1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 /* put back original composed preallocation function */ 175 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 176 if (f) { 177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 178 } 179 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 188 189 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 190 *newmat = B; 191 PetscFunctionReturn(0); 192 } 193 EXTERN_C_END 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatDestroy_MUMPS" 197 PetscErrorCode MatDestroy_MUMPS(Mat A) 198 { 199 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 200 PetscErrorCode ierr; 201 PetscMPIInt size=lu->size; 202 PetscErrorCode (*specialdestroy)(Mat); 203 PetscFunctionBegin; 204 if (lu->CleanUpMUMPS) { 205 /* Terminate instance, deallocate memories */ 206 if (size > 1){ 207 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 208 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 209 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 210 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 211 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 212 ierr = PetscFree(lu->val);CHKERRQ(ierr); 213 } 214 lu->id.job=JOB_END; 215 #if defined(PETSC_USE_COMPLEX) 216 zmumps_c(&lu->id); 217 #else 218 dmumps_c(&lu->id); 219 #endif 220 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 221 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 222 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 223 } 224 specialdestroy = lu->specialdestroy; 225 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 226 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatDestroy_AIJMUMPS" 232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 233 { 234 PetscErrorCode ierr; 235 PetscMPIInt size; 236 237 PetscFunctionBegin; 238 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 239 if (size==1) { 240 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 241 } else { 242 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 243 } 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 250 { 251 PetscErrorCode ierr; 252 PetscMPIInt size; 253 254 PetscFunctionBegin; 255 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 256 if (size==1) { 257 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 258 } else { 259 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatFactorInfo_MUMPS" 266 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 267 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 278 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 279 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 280 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 281 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));CHKERRQ(ierr); 282 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 283 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 285 if (!lu->myid && lu->id.ICNTL(11)>0) { 286 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 287 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 288 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 289 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 290 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 291 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 292 293 } 294 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 297 /* ICNTL(15-17) not used */ 298 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));CHKERRQ(ierr); 301 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));CHKERRQ(ierr); 302 303 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));CHKERRQ(ierr); 307 308 /* infomation local to each processor */ 309 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);} 310 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 311 ierr = PetscSynchronizedFlush(A->comm); 312 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);} 313 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 314 ierr = PetscSynchronizedFlush(A->comm); 315 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);} 316 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 317 ierr = PetscSynchronizedFlush(A->comm); 318 /* 319 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);} 320 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr); 321 ierr = PetscSynchronizedFlush(A->comm); 322 */ 323 324 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);} 325 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr); 326 ierr = PetscSynchronizedFlush(A->comm); 327 328 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);} 329 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr); 330 ierr = PetscSynchronizedFlush(A->comm); 331 332 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);} 333 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr); 334 ierr = PetscSynchronizedFlush(A->comm); 335 336 if (!lu->myid){ /* information from the host */ 337 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 339 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 340 341 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 342 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 343 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 344 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 345 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 346 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 348 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 349 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 351 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 352 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 353 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 354 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 355 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 356 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 357 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 358 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 359 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr); 360 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr); 361 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr); 362 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr); 363 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr); 364 } 365 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_MUMPS" 371 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 372 PetscErrorCode ierr; 373 PetscTruth iascii; 374 PetscViewerFormat format; 375 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 376 377 PetscFunctionBegin; 378 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 379 380 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 381 if (iascii) { 382 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 383 if (format == PETSC_VIEWER_ASCII_INFO){ 384 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 385 } 386 } 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "MatSolve_AIJMUMPS" 392 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 393 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 394 PetscScalar *array; 395 Vec x_seq; 396 IS is_iden,is_petsc; 397 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 398 PetscErrorCode ierr; 399 PetscInt i; 400 401 PetscFunctionBegin; 402 lu->id.nrhs = 1; 403 x_seq = lu->b_seq; 404 if (lu->size > 1){ 405 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 406 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 407 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 408 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 409 } else { /* size == 1 */ 410 ierr = VecCopy(b,x);CHKERRQ(ierr); 411 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 412 } 413 if (!lu->myid) { /* define rhs on the host */ 414 #if defined(PETSC_USE_COMPLEX) 415 lu->id.rhs = (mumps_double_complex*)array; 416 #else 417 lu->id.rhs = array; 418 #endif 419 } 420 if (lu->size == 1){ 421 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 422 } else if (!lu->myid){ 423 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 424 } 425 426 if (lu->size > 1){ 427 /* distributed solution */ 428 lu->id.ICNTL(21) = 1; 429 if (!lu->nSolve){ 430 /* Create x_seq=sol_loc for repeated use */ 431 PetscInt lsol_loc; 432 PetscScalar *sol_loc; 433 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 434 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 435 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 436 lu->id.lsol_loc = lsol_loc; 437 lu->id.sol_loc = (F_DOUBLE *)sol_loc; 438 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 439 } 440 } 441 442 /* solve phase */ 443 /*-------------*/ 444 lu->id.job = 3; 445 #if defined(PETSC_USE_COMPLEX) 446 zmumps_c(&lu->id); 447 #else 448 dmumps_c(&lu->id); 449 #endif 450 if (lu->id.INFOG(1) < 0) { 451 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 452 } 453 454 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 455 if (!lu->nSolve){ /* create scatter scat_sol */ 456 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 457 for (i=0; i<lu->id.lsol_loc; i++){ 458 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 459 } 460 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 461 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 462 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 463 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 464 } 465 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 466 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 467 } 468 lu->nSolve++; 469 PetscFunctionReturn(0); 470 } 471 472 /* 473 input: 474 F: numeric factor 475 output: 476 nneg: total number of negative pivots 477 nzero: 0 478 npos: (global dimension of F) - nneg 479 */ 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 483 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 484 { 485 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 486 PetscErrorCode ierr; 487 PetscMPIInt size; 488 489 PetscFunctionBegin; 490 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 491 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 492 if (size > 1 && lu->id.ICNTL(13) != 1){ 493 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 494 } 495 if (nneg){ 496 if (!lu->myid){ 497 *nneg = lu->id.INFOG(12); 498 } 499 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 500 } 501 if (nzero) *nzero = 0; 502 if (npos) *npos = F->rmap.N - (*nneg); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 508 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 509 { 510 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 511 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 512 PetscErrorCode ierr; 513 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 514 PetscTruth valOnly,flg; 515 Mat F_diag; 516 517 PetscFunctionBegin; 518 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 519 (*F)->ops->solve = MatSolve_AIJMUMPS; 520 521 /* Initialize a MUMPS instance */ 522 ierr = MPI_Comm_rank(A->comm, &lu->myid); 523 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 524 lua->myid = lu->myid; lua->size = lu->size; 525 lu->id.job = JOB_INIT; 526 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 527 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 528 529 /* Set mumps options */ 530 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 531 lu->id.par=1; /* host participates factorizaton and solve */ 532 lu->id.sym=lu->sym; 533 if (lu->sym == 2){ 534 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 535 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 536 } 537 #if defined(PETSC_USE_COMPLEX) 538 zmumps_c(&lu->id); 539 #else 540 dmumps_c(&lu->id); 541 #endif 542 543 if (lu->size == 1){ 544 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 545 } else { 546 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 547 } 548 549 icntl=-1; 550 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 551 if ((flg && icntl > 0) || PetscLogPrintInfo) { 552 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 553 } else { /* no output */ 554 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 555 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 556 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 557 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 558 } 559 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 560 icntl=-1; 561 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 562 if (flg) { 563 if (icntl== 1){ 564 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 565 } else { 566 lu->id.ICNTL(7) = icntl; 567 } 568 } 569 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 570 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 571 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 572 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 576 577 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 579 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 580 PetscOptionsEnd(); 581 } 582 583 /* define matrix A */ 584 switch (lu->id.ICNTL(18)){ 585 case 0: /* centralized assembled matrix input (size=1) */ 586 if (!lu->myid) { 587 if (lua->isAIJ){ 588 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 589 nz = aa->nz; 590 ai = aa->i; aj = aa->j; lu->val = aa->a; 591 } else { 592 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 593 nz = aa->nz; 594 ai = aa->i; aj = aa->j; lu->val = aa->a; 595 } 596 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 597 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 598 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 599 nz = 0; 600 for (i=0; i<M; i++){ 601 rnz = ai[i+1] - ai[i]; 602 while (rnz--) { /* Fortran row/col index! */ 603 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 604 } 605 } 606 } 607 } 608 break; 609 case 3: /* distributed assembled matrix input (size>1) */ 610 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 611 valOnly = PETSC_FALSE; 612 } else { 613 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 614 } 615 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 616 break; 617 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 618 } 619 620 /* analysis phase */ 621 /*----------------*/ 622 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 623 lu->id.job = 1; 624 625 lu->id.n = M; 626 switch (lu->id.ICNTL(18)){ 627 case 0: /* centralized assembled matrix input */ 628 if (!lu->myid) { 629 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 630 if (lu->id.ICNTL(6)>1){ 631 #if defined(PETSC_USE_COMPLEX) 632 lu->id.a = (mumps_double_complex*)lu->val; 633 #else 634 lu->id.a = lu->val; 635 #endif 636 } 637 } 638 break; 639 case 3: /* distributed assembled matrix input (size>1) */ 640 lu->id.nz_loc = nnz; 641 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 642 if (lu->id.ICNTL(6)>1) { 643 #if defined(PETSC_USE_COMPLEX) 644 lu->id.a_loc = (mumps_double_complex*)lu->val; 645 #else 646 lu->id.a_loc = lu->val; 647 #endif 648 } 649 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 650 IS is_iden; 651 Vec b; 652 if (!lu->myid){ 653 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 654 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 655 } else { 656 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 657 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 658 } 659 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 660 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 661 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 662 663 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 664 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 665 ierr = VecDestroy(b);CHKERRQ(ierr); 666 break; 667 } 668 #if defined(PETSC_USE_COMPLEX) 669 zmumps_c(&lu->id); 670 #else 671 dmumps_c(&lu->id); 672 #endif 673 if (lu->id.INFOG(1) < 0) { 674 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 675 } 676 } 677 678 /* numerical factorization phase */ 679 /*-------------------------------*/ 680 lu->id.job = 2; 681 if(!lu->id.ICNTL(18)) { 682 if (!lu->myid) { 683 #if defined(PETSC_USE_COMPLEX) 684 lu->id.a = (mumps_double_complex*)lu->val; 685 #else 686 lu->id.a = lu->val; 687 #endif 688 } 689 } else { 690 #if defined(PETSC_USE_COMPLEX) 691 lu->id.a_loc = (mumps_double_complex*)lu->val; 692 #else 693 lu->id.a_loc = lu->val; 694 #endif 695 } 696 #if defined(PETSC_USE_COMPLEX) 697 zmumps_c(&lu->id); 698 #else 699 dmumps_c(&lu->id); 700 #endif 701 if (lu->id.INFOG(1) < 0) { 702 if (lu->id.INFO(1) == -13) { 703 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 704 } else { 705 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 706 } 707 } 708 709 if (!lu->myid && lu->id.ICNTL(16) > 0){ 710 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 711 } 712 713 if (lu->size > 1){ 714 if ((*F)->factor == FACTOR_LU){ 715 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 716 } else { 717 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 718 } 719 F_diag->assembled = PETSC_TRUE; 720 if (lu->nSolve){ 721 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 722 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 723 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 724 } 725 } 726 (*F)->assembled = PETSC_TRUE; 727 lu->matstruc = SAME_NONZERO_PATTERN; 728 lu->CleanUpMUMPS = PETSC_TRUE; 729 lu->nSolve = 0; 730 PetscFunctionReturn(0); 731 } 732 733 /* Note the Petsc r and c permutations are ignored */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 736 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 737 Mat B; 738 Mat_MUMPS *lu; 739 PetscErrorCode ierr; 740 741 PetscFunctionBegin; 742 /* Create the factorization matrix */ 743 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 744 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 745 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 746 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 747 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 748 749 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 750 B->factor = FACTOR_LU; 751 lu = (Mat_MUMPS*)B->spptr; 752 lu->sym = 0; 753 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 754 755 *F = B; 756 PetscFunctionReturn(0); 757 } 758 759 /* Note the Petsc r permutation is ignored */ 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 762 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 763 Mat B; 764 Mat_MUMPS *lu; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 /* Create the factorization matrix */ 769 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 770 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 771 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 772 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 773 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 774 775 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 776 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 777 B->factor = FACTOR_CHOLESKY; 778 lu = (Mat_MUMPS*)B->spptr; 779 lu->sym = 2; 780 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 781 782 *F = B; 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 788 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 789 PetscErrorCode ierr; 790 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 791 792 PetscFunctionBegin; 793 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 794 795 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 796 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 797 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 798 PetscFunctionReturn(0); 799 } 800 801 EXTERN_C_BEGIN 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 804 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 805 { 806 PetscErrorCode ierr; 807 PetscMPIInt size; 808 MPI_Comm comm; 809 Mat B=*newmat; 810 Mat_MUMPS *mumps; 811 812 PetscFunctionBegin; 813 if (reuse == MAT_INITIAL_MATRIX) { 814 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 815 } 816 817 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 818 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 819 820 mumps->MatDuplicate = A->ops->duplicate; 821 mumps->MatView = A->ops->view; 822 mumps->MatAssemblyEnd = A->ops->assemblyend; 823 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 824 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 825 mumps->MatDestroy = A->ops->destroy; 826 mumps->specialdestroy = MatDestroy_AIJMUMPS; 827 mumps->CleanUpMUMPS = PETSC_FALSE; 828 mumps->isAIJ = PETSC_TRUE; 829 830 B->spptr = (void*)mumps; 831 B->ops->duplicate = MatDuplicate_MUMPS; 832 B->ops->view = MatView_MUMPS; 833 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 834 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 835 B->ops->destroy = MatDestroy_MUMPS; 836 837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 838 if (size == 1) { 839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 840 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 842 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 843 } else { 844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 845 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 847 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 848 } 849 850 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 851 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 852 *newmat = B; 853 PetscFunctionReturn(0); 854 } 855 EXTERN_C_END 856 857 /*MC 858 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 859 and sequential matrices via the external package MUMPS. 860 861 If MUMPS is installed (see the manual for instructions 862 on how to declare the existence of external packages), 863 a matrix type can be constructed which invokes MUMPS solvers. 864 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 865 866 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 867 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 868 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 869 for communicators controlling multiple processes. It is recommended that you call both of 870 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 871 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 872 without data copy. 873 874 Options Database Keys: 875 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 876 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 877 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 878 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 879 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 880 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 881 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 882 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 883 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 884 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 885 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 886 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 887 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 888 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 889 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 890 891 Level: beginner 892 893 .seealso: MATSBAIJMUMPS 894 M*/ 895 896 EXTERN_C_BEGIN 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatCreate_AIJMUMPS" 899 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 900 { 901 PetscErrorCode ierr; 902 PetscMPIInt size; 903 904 PetscFunctionBegin; 905 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 906 if (size == 1) { 907 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 908 } else { 909 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 910 /* 911 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 912 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 913 */ 914 } 915 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 EXTERN_C_END 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 922 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 923 { 924 PetscErrorCode ierr; 925 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 926 927 PetscFunctionBegin; 928 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 929 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 930 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 931 PetscFunctionReturn(0); 932 } 933 934 EXTERN_C_BEGIN 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 937 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 938 { 939 Mat A; 940 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 /* 945 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 946 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 947 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 948 block size info so that PETSc can determine the local size properly. The block size info is set 949 in the preallocation routine. 950 */ 951 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 952 A = ((Mat_MPISBAIJ *)B->data)->A; 953 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 EXTERN_C_END 957 958 EXTERN_C_BEGIN 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 961 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 962 { 963 PetscErrorCode ierr; 964 PetscMPIInt size; 965 MPI_Comm comm; 966 Mat B=*newmat; 967 Mat_MUMPS *mumps; 968 void (*f)(void); 969 970 PetscFunctionBegin; 971 if (reuse == MAT_INITIAL_MATRIX) { 972 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 973 } 974 975 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 976 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 977 978 mumps->MatDuplicate = A->ops->duplicate; 979 mumps->MatView = A->ops->view; 980 mumps->MatAssemblyEnd = A->ops->assemblyend; 981 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 982 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 983 mumps->MatDestroy = A->ops->destroy; 984 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 985 mumps->CleanUpMUMPS = PETSC_FALSE; 986 mumps->isAIJ = PETSC_FALSE; 987 988 B->spptr = (void*)mumps; 989 B->ops->duplicate = MatDuplicate_MUMPS; 990 B->ops->view = MatView_MUMPS; 991 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 992 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 993 B->ops->destroy = MatDestroy_MUMPS; 994 995 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 996 if (size == 1) { 997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 998 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 1000 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1001 } else { 1002 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 1003 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 1004 if (f) { /* This case should always be true when this routine is called */ 1005 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 1006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1007 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 1008 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 1009 } 1010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 1011 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 1013 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1014 } 1015 1016 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 1017 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 1018 *newmat = B; 1019 PetscFunctionReturn(0); 1020 } 1021 EXTERN_C_END 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatDuplicate_MUMPS" 1025 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1026 PetscErrorCode ierr; 1027 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1028 1029 PetscFunctionBegin; 1030 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1031 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 /*MC 1036 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1037 distributed and sequential matrices via the external package MUMPS. 1038 1039 If MUMPS is installed (see the manual for instructions 1040 on how to declare the existence of external packages), 1041 a matrix type can be constructed which invokes MUMPS solvers. 1042 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1043 1044 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1045 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1046 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1047 for communicators controlling multiple processes. It is recommended that you call both of 1048 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1049 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1050 without data copy. 1051 1052 Options Database Keys: 1053 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1054 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1055 . -mat_mumps_icntl_4 <0,...,4> - print level 1056 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1057 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1058 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1059 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1060 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1061 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1062 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1063 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1064 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1065 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1066 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1067 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1068 1069 Level: beginner 1070 1071 .seealso: MATAIJMUMPS 1072 M*/ 1073 1074 EXTERN_C_BEGIN 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1077 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1078 { 1079 PetscErrorCode ierr; 1080 PetscMPIInt size; 1081 1082 PetscFunctionBegin; 1083 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1084 if (size == 1) { 1085 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1086 } else { 1087 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1088 } 1089 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 EXTERN_C_END 1093