1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 /* put back original composed preallocation function */ 175 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 176 if (f) { 177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 178 } 179 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 188 189 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 190 *newmat = B; 191 PetscFunctionReturn(0); 192 } 193 EXTERN_C_END 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatDestroy_MUMPS" 197 PetscErrorCode MatDestroy_MUMPS(Mat A) 198 { 199 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 200 PetscErrorCode ierr; 201 PetscMPIInt size=lu->size; 202 PetscErrorCode (*specialdestroy)(Mat); 203 PetscFunctionBegin; 204 if (lu->CleanUpMUMPS) { 205 /* Terminate instance, deallocate memories */ 206 if (size > 1){ 207 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 208 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 209 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 210 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 211 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 212 ierr = PetscFree(lu->val);CHKERRQ(ierr); 213 } 214 lu->id.job=JOB_END; 215 #if defined(PETSC_USE_COMPLEX) 216 zmumps_c(&lu->id); 217 #else 218 dmumps_c(&lu->id); 219 #endif 220 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 221 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 222 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 223 } 224 specialdestroy = lu->specialdestroy; 225 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 226 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatDestroy_AIJMUMPS" 232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 233 { 234 PetscErrorCode ierr; 235 PetscMPIInt size; 236 237 PetscFunctionBegin; 238 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 239 if (size==1) { 240 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 241 } else { 242 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 243 } 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 250 { 251 PetscErrorCode ierr; 252 PetscMPIInt size; 253 254 PetscFunctionBegin; 255 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 256 if (size==1) { 257 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 258 } else { 259 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatFactorInfo_MUMPS" 266 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 267 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 278 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 279 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 280 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 281 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));CHKERRQ(ierr); 282 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 283 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 285 if (!lu->myid && lu->id.ICNTL(11)>0) { 286 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 287 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 288 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 289 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 290 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 291 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 292 293 } 294 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 297 /* ICNTL(15-17) not used */ 298 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));CHKERRQ(ierr); 301 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));CHKERRQ(ierr); 302 303 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));CHKERRQ(ierr); 307 308 /* infomation local to each processor */ 309 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);} 310 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 311 ierr = PetscSynchronizedFlush(A->comm); 312 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);} 313 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 314 ierr = PetscSynchronizedFlush(A->comm); 315 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);} 316 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 317 ierr = PetscSynchronizedFlush(A->comm); 318 /* 319 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);} 320 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr); 321 ierr = PetscSynchronizedFlush(A->comm); 322 */ 323 324 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);} 325 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr); 326 ierr = PetscSynchronizedFlush(A->comm); 327 328 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);} 329 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr); 330 ierr = PetscSynchronizedFlush(A->comm); 331 332 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);} 333 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr); 334 ierr = PetscSynchronizedFlush(A->comm); 335 336 if (!lu->myid){ /* information from the host */ 337 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 339 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 340 341 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 342 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 343 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 344 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 345 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 346 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 348 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 349 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 351 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 352 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 353 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 354 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 355 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 356 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 357 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 358 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 359 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr); 360 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr); 361 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr); 362 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr); 363 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr); 364 } 365 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_MUMPS" 371 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 372 PetscErrorCode ierr; 373 PetscTruth iascii; 374 PetscViewerFormat format; 375 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 376 377 PetscFunctionBegin; 378 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 379 380 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 381 if (iascii) { 382 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 383 if (format == PETSC_VIEWER_ASCII_INFO){ 384 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 385 } 386 } 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "MatSolve_AIJMUMPS" 392 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 393 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 394 PetscScalar *array; 395 Vec x_seq; 396 IS is_iden,is_petsc; 397 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 398 PetscErrorCode ierr; 399 PetscInt i; 400 401 PetscFunctionBegin; 402 lu->id.nrhs = 1; 403 x_seq = lu->b_seq; 404 if (lu->size > 1){ 405 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 406 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 407 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 408 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 409 } else { /* size == 1 */ 410 ierr = VecCopy(b,x);CHKERRQ(ierr); 411 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 412 } 413 if (!lu->myid) { /* define rhs on the host */ 414 #if defined(PETSC_USE_COMPLEX) 415 lu->id.rhs = (mumps_double_complex*)array; 416 #else 417 lu->id.rhs = array; 418 #endif 419 } 420 if (lu->size == 1){ 421 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 422 } else if (!lu->myid){ 423 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 424 } 425 426 if (lu->size > 1){ 427 /* distributed solution */ 428 lu->id.ICNTL(21) = 1; 429 if (!lu->nSolve){ 430 /* Create x_seq=sol_loc for repeated use */ 431 PetscInt lsol_loc; 432 PetscScalar *sol_loc; 433 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 434 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 435 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 436 lu->id.lsol_loc = lsol_loc; 437 lu->id.sol_loc = (F_DOUBLE *)sol_loc; 438 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 439 } 440 } 441 442 /* solve phase */ 443 /*-------------*/ 444 lu->id.job = 3; 445 #if defined(PETSC_USE_COMPLEX) 446 zmumps_c(&lu->id); 447 #else 448 dmumps_c(&lu->id); 449 #endif 450 if (lu->id.INFOG(1) < 0) { 451 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 452 } 453 454 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 455 if (!lu->nSolve){ /* create scatter scat_sol */ 456 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 457 for (i=0; i<lu->id.lsol_loc; i++){ 458 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 459 } 460 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 461 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 462 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 463 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 464 } 465 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 466 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 467 } 468 lu->nSolve++; 469 PetscFunctionReturn(0); 470 } 471 472 /* 473 input: 474 F: numeric factor 475 output: 476 nneg: total number of negative pivots 477 nzero: 0 478 npos: (global dimension of F) - nneg 479 */ 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 483 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 484 { 485 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 486 PetscErrorCode ierr; 487 PetscMPIInt size; 488 489 PetscFunctionBegin; 490 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 491 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 492 if (size > 1 && lu->id.ICNTL(13) != 1){ 493 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 494 } 495 if (nneg){ 496 if (!lu->myid){ 497 *nneg = lu->id.INFOG(12); 498 } 499 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 500 } 501 if (nzero) *nzero = 0; 502 if (npos) *npos = F->rmap.N - (*nneg); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 508 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 509 { 510 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 511 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 512 PetscErrorCode ierr; 513 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 514 PetscTruth valOnly,flg; 515 Mat F_diag; 516 517 PetscFunctionBegin; 518 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 519 (*F)->ops->solve = MatSolve_AIJMUMPS; 520 521 /* Initialize a MUMPS instance */ 522 ierr = MPI_Comm_rank(A->comm, &lu->myid); 523 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 524 lua->myid = lu->myid; lua->size = lu->size; 525 lu->id.job = JOB_INIT; 526 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 527 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 528 529 /* Set mumps options */ 530 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 531 lu->id.par=1; /* host participates factorizaton and solve */ 532 lu->id.sym=lu->sym; 533 if (lu->sym == 2){ 534 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 535 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 536 } 537 #if defined(PETSC_USE_COMPLEX) 538 zmumps_c(&lu->id); 539 #else 540 dmumps_c(&lu->id); 541 #endif 542 543 if (lu->size == 1){ 544 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 545 } else { 546 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 547 } 548 549 icntl=-1; 550 lu->id.ICNTL(4) = 0; /* level of printing; overwrite mumps default ICNTL(4)=2 */ 551 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 552 if ((flg && icntl > 0) || PetscLogPrintInfo) { 553 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 554 } else { /* no output */ 555 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 556 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 557 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 558 } 559 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 560 icntl=-1; 561 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 562 if (flg) { 563 if (icntl== 1){ 564 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 565 } else { 566 lu->id.ICNTL(7) = icntl; 567 } 568 } 569 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 570 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 571 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 572 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 576 577 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 579 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 580 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr); 581 PetscOptionsEnd(); 582 } 583 584 /* define matrix A */ 585 switch (lu->id.ICNTL(18)){ 586 case 0: /* centralized assembled matrix input (size=1) */ 587 if (!lu->myid) { 588 if (lua->isAIJ){ 589 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 590 nz = aa->nz; 591 ai = aa->i; aj = aa->j; lu->val = aa->a; 592 } else { 593 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 594 nz = aa->nz; 595 ai = aa->i; aj = aa->j; lu->val = aa->a; 596 } 597 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 598 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 599 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 600 nz = 0; 601 for (i=0; i<M; i++){ 602 rnz = ai[i+1] - ai[i]; 603 while (rnz--) { /* Fortran row/col index! */ 604 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 605 } 606 } 607 } 608 } 609 break; 610 case 3: /* distributed assembled matrix input (size>1) */ 611 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 612 valOnly = PETSC_FALSE; 613 } else { 614 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 615 } 616 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 617 break; 618 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 619 } 620 621 /* analysis phase */ 622 /*----------------*/ 623 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 624 lu->id.job = 1; 625 626 lu->id.n = M; 627 switch (lu->id.ICNTL(18)){ 628 case 0: /* centralized assembled matrix input */ 629 if (!lu->myid) { 630 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 631 if (lu->id.ICNTL(6)>1){ 632 #if defined(PETSC_USE_COMPLEX) 633 lu->id.a = (mumps_double_complex*)lu->val; 634 #else 635 lu->id.a = lu->val; 636 #endif 637 } 638 } 639 break; 640 case 3: /* distributed assembled matrix input (size>1) */ 641 lu->id.nz_loc = nnz; 642 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 643 if (lu->id.ICNTL(6)>1) { 644 #if defined(PETSC_USE_COMPLEX) 645 lu->id.a_loc = (mumps_double_complex*)lu->val; 646 #else 647 lu->id.a_loc = lu->val; 648 #endif 649 } 650 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 651 IS is_iden; 652 Vec b; 653 if (!lu->myid){ 654 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 655 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 656 } else { 657 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 658 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 659 } 660 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 661 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 663 664 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 665 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 666 ierr = VecDestroy(b);CHKERRQ(ierr); 667 break; 668 } 669 #if defined(PETSC_USE_COMPLEX) 670 zmumps_c(&lu->id); 671 #else 672 dmumps_c(&lu->id); 673 #endif 674 if (lu->id.INFOG(1) < 0) { 675 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 676 } 677 } 678 679 /* numerical factorization phase */ 680 /*-------------------------------*/ 681 lu->id.job = 2; 682 if(!lu->id.ICNTL(18)) { 683 if (!lu->myid) { 684 #if defined(PETSC_USE_COMPLEX) 685 lu->id.a = (mumps_double_complex*)lu->val; 686 #else 687 lu->id.a = lu->val; 688 #endif 689 } 690 } else { 691 #if defined(PETSC_USE_COMPLEX) 692 lu->id.a_loc = (mumps_double_complex*)lu->val; 693 #else 694 lu->id.a_loc = lu->val; 695 #endif 696 } 697 #if defined(PETSC_USE_COMPLEX) 698 zmumps_c(&lu->id); 699 #else 700 dmumps_c(&lu->id); 701 #endif 702 if (lu->id.INFOG(1) < 0) { 703 if (lu->id.INFO(1) == -13) { 704 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 705 } else { 706 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 707 } 708 } 709 710 if (!lu->myid && lu->id.ICNTL(16) > 0){ 711 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 712 } 713 714 if (lu->size > 1){ 715 if ((*F)->factor == FACTOR_LU){ 716 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 717 } else { 718 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 719 } 720 F_diag->assembled = PETSC_TRUE; 721 if (lu->nSolve){ 722 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 723 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 724 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 725 } 726 } 727 (*F)->assembled = PETSC_TRUE; 728 lu->matstruc = SAME_NONZERO_PATTERN; 729 lu->CleanUpMUMPS = PETSC_TRUE; 730 lu->nSolve = 0; 731 PetscFunctionReturn(0); 732 } 733 734 /* Note the Petsc r and c permutations are ignored */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 737 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 738 Mat B; 739 Mat_MUMPS *lu; 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 /* Create the factorization matrix */ 744 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 745 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 746 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 747 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 748 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 749 750 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 751 B->factor = FACTOR_LU; 752 lu = (Mat_MUMPS*)B->spptr; 753 lu->sym = 0; 754 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 755 756 *F = B; 757 PetscFunctionReturn(0); 758 } 759 760 /* Note the Petsc r permutation is ignored */ 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 763 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 764 Mat B; 765 Mat_MUMPS *lu; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 /* Create the factorization matrix */ 770 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 771 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 772 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 773 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 774 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 775 776 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 777 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 778 B->factor = FACTOR_CHOLESKY; 779 lu = (Mat_MUMPS*)B->spptr; 780 lu->sym = 2; 781 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 782 783 *F = B; 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 789 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 790 PetscErrorCode ierr; 791 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 792 793 PetscFunctionBegin; 794 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 795 796 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 797 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 798 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 799 PetscFunctionReturn(0); 800 } 801 802 EXTERN_C_BEGIN 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 805 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 806 { 807 PetscErrorCode ierr; 808 PetscMPIInt size; 809 MPI_Comm comm; 810 Mat B=*newmat; 811 Mat_MUMPS *mumps; 812 813 PetscFunctionBegin; 814 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 815 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 816 817 if (reuse == MAT_INITIAL_MATRIX) { 818 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 819 /* A may have special container that is not duplicated, 820 e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */ 821 mumps->MatDuplicate = B->ops->duplicate; 822 mumps->MatView = B->ops->view; 823 mumps->MatAssemblyEnd = B->ops->assemblyend; 824 mumps->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 825 mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic; 826 mumps->MatDestroy = B->ops->destroy; 827 } else { 828 mumps->MatDuplicate = A->ops->duplicate; 829 mumps->MatView = A->ops->view; 830 mumps->MatAssemblyEnd = A->ops->assemblyend; 831 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 832 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 833 mumps->MatDestroy = A->ops->destroy; 834 } 835 mumps->specialdestroy = MatDestroy_AIJMUMPS; 836 mumps->CleanUpMUMPS = PETSC_FALSE; 837 mumps->isAIJ = PETSC_TRUE; 838 839 B->spptr = (void*)mumps; 840 B->ops->duplicate = MatDuplicate_MUMPS; 841 B->ops->view = MatView_MUMPS; 842 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 843 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 844 B->ops->destroy = MatDestroy_MUMPS; 845 846 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 847 if (size == 1) { 848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 849 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 851 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 852 } else { 853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 854 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 856 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 857 } 858 859 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 860 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 861 *newmat = B; 862 PetscFunctionReturn(0); 863 } 864 EXTERN_C_END 865 866 /*MC 867 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 868 and sequential matrices via the external package MUMPS. 869 870 If MUMPS is installed (see the manual for instructions 871 on how to declare the existence of external packages), 872 a matrix type can be constructed which invokes MUMPS solvers. 873 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 874 875 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 876 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 877 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 878 for communicators controlling multiple processes. It is recommended that you call both of 879 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 880 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 881 without data copy. 882 883 Options Database Keys: 884 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 885 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 886 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 887 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 888 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 889 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 890 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 891 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 892 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 893 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 894 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 895 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 896 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 897 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 898 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 899 900 Level: beginner 901 902 .seealso: MATSBAIJMUMPS 903 M*/ 904 905 EXTERN_C_BEGIN 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatCreate_AIJMUMPS" 908 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 909 { 910 PetscErrorCode ierr; 911 PetscMPIInt size; 912 913 PetscFunctionBegin; 914 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 915 if (size == 1) { 916 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 917 } else { 918 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 919 /* 920 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 921 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 922 */ 923 } 924 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 EXTERN_C_END 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 931 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 932 { 933 PetscErrorCode ierr; 934 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 935 936 PetscFunctionBegin; 937 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 938 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 939 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 940 PetscFunctionReturn(0); 941 } 942 943 EXTERN_C_BEGIN 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 946 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 947 { 948 Mat A; 949 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 /* 954 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 955 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 956 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 957 block size info so that PETSc can determine the local size properly. The block size info is set 958 in the preallocation routine. 959 */ 960 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 961 A = ((Mat_MPISBAIJ *)B->data)->A; 962 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 EXTERN_C_END 966 967 EXTERN_C_BEGIN 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 970 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 971 { 972 PetscErrorCode ierr; 973 PetscMPIInt size; 974 MPI_Comm comm; 975 Mat B=*newmat; 976 Mat_MUMPS *mumps; 977 void (*f)(void); 978 979 PetscFunctionBegin; 980 if (reuse == MAT_INITIAL_MATRIX) { 981 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 982 } 983 984 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 985 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 986 987 mumps->MatDuplicate = A->ops->duplicate; 988 mumps->MatView = A->ops->view; 989 mumps->MatAssemblyEnd = A->ops->assemblyend; 990 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 991 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 992 mumps->MatDestroy = A->ops->destroy; 993 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 994 mumps->CleanUpMUMPS = PETSC_FALSE; 995 mumps->isAIJ = PETSC_FALSE; 996 997 B->spptr = (void*)mumps; 998 B->ops->duplicate = MatDuplicate_MUMPS; 999 B->ops->view = MatView_MUMPS; 1000 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 1001 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 1002 B->ops->destroy = MatDestroy_MUMPS; 1003 1004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1005 if (size == 1) { 1006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 1007 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 1009 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1010 } else { 1011 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 1012 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 1013 if (f) { /* This case should always be true when this routine is called */ 1014 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 1015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1016 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 1017 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 1018 } 1019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 1020 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 1022 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1023 } 1024 1025 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 1026 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 1027 *newmat = B; 1028 PetscFunctionReturn(0); 1029 } 1030 EXTERN_C_END 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatDuplicate_MUMPS" 1034 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1035 PetscErrorCode ierr; 1036 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1037 1038 PetscFunctionBegin; 1039 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1040 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 /*MC 1045 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1046 distributed and sequential matrices via the external package MUMPS. 1047 1048 If MUMPS is installed (see the manual for instructions 1049 on how to declare the existence of external packages), 1050 a matrix type can be constructed which invokes MUMPS solvers. 1051 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1052 1053 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1054 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1055 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1056 for communicators controlling multiple processes. It is recommended that you call both of 1057 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1058 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1059 without data copy. 1060 1061 Options Database Keys: 1062 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1063 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1064 . -mat_mumps_icntl_4 <0,...,4> - print level 1065 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1066 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1067 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1068 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1069 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1070 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1071 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1072 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1073 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1074 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1075 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1076 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1077 1078 Level: beginner 1079 1080 .seealso: MATAIJMUMPS 1081 M*/ 1082 1083 EXTERN_C_BEGIN 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1087 { 1088 PetscErrorCode ierr; 1089 PetscMPIInt size; 1090 1091 PetscFunctionBegin; 1092 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1093 if (size == 1) { 1094 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1095 } else { 1096 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1097 } 1098 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 EXTERN_C_END 1102