1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 175 if (f) { 176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 177 } 178 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 187 188 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 189 *newmat = B; 190 PetscFunctionReturn(0); 191 } 192 EXTERN_C_END 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatDestroy_MUMPS" 196 PetscErrorCode MatDestroy_MUMPS(Mat A) 197 { 198 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 199 PetscErrorCode ierr; 200 PetscMPIInt size=lu->size; 201 PetscErrorCode (*specialdestroy)(Mat); 202 PetscFunctionBegin; 203 if (lu->CleanUpMUMPS) { 204 /* Terminate instance, deallocate memories */ 205 if (size > 1){ 206 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 207 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 208 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 209 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 210 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 211 ierr = PetscFree(lu->val);CHKERRQ(ierr); 212 } 213 lu->id.job=JOB_END; 214 #if defined(PETSC_USE_COMPLEX) 215 zmumps_c(&lu->id); 216 #else 217 dmumps_c(&lu->id); 218 #endif 219 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 220 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 221 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 222 } 223 specialdestroy = lu->specialdestroy; 224 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 225 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatDestroy_AIJMUMPS" 231 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 232 { 233 PetscErrorCode ierr; 234 PetscMPIInt size; 235 236 PetscFunctionBegin; 237 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 238 if (size==1) { 239 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 240 } else { 241 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 248 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 249 { 250 PetscErrorCode ierr; 251 PetscMPIInt size; 252 253 PetscFunctionBegin; 254 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 255 if (size==1) { 256 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 257 } else { 258 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 259 } 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatFactorInfo_MUMPS" 265 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 266 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 278 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 279 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 280 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));CHKERRQ(ierr); 281 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 282 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 283 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 284 if (!lu->myid && lu->id.ICNTL(11)>0) { 285 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 286 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 287 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 288 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 289 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 290 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 291 292 } 293 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 296 /* ICNTL(15-17) not used */ 297 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 298 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));CHKERRQ(ierr); 301 302 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));CHKERRQ(ierr); 306 307 /* infomation local to each processor */ 308 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);} 309 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 310 ierr = PetscSynchronizedFlush(A->comm); 311 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);} 312 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 313 ierr = PetscSynchronizedFlush(A->comm); 314 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);} 315 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 316 ierr = PetscSynchronizedFlush(A->comm); 317 /* 318 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);} 319 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr); 320 ierr = PetscSynchronizedFlush(A->comm); 321 */ 322 323 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);} 324 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr); 325 ierr = PetscSynchronizedFlush(A->comm); 326 327 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);} 328 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr); 329 ierr = PetscSynchronizedFlush(A->comm); 330 331 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);} 332 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr); 333 ierr = PetscSynchronizedFlush(A->comm); 334 335 if (!lu->myid){ /* information from the host */ 336 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 337 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 339 340 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 342 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 343 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 344 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 345 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 346 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 348 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 349 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 351 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 352 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 353 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 354 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 355 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 356 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 357 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 358 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr); 359 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr); 360 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr); 361 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr); 362 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr); 363 } 364 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatView_MUMPS" 370 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 371 PetscErrorCode ierr; 372 PetscTruth iascii; 373 PetscViewerFormat format; 374 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 375 376 PetscFunctionBegin; 377 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 378 379 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 380 if (iascii) { 381 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 382 if (format == PETSC_VIEWER_ASCII_INFO){ 383 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 384 } 385 } 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatSolve_AIJMUMPS" 391 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 392 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 393 PetscScalar *array; 394 Vec x_seq; 395 IS is_iden,is_petsc; 396 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 397 PetscErrorCode ierr; 398 PetscInt i; 399 400 PetscFunctionBegin; 401 lu->id.nrhs = 1; 402 x_seq = lu->b_seq; 403 if (lu->size > 1){ 404 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 405 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 406 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 407 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 408 } else { /* size == 1 */ 409 ierr = VecCopy(b,x);CHKERRQ(ierr); 410 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 411 } 412 if (!lu->myid) { /* define rhs on the host */ 413 #if defined(PETSC_USE_COMPLEX) 414 lu->id.rhs = (mumps_double_complex*)array; 415 #else 416 lu->id.rhs = array; 417 #endif 418 } 419 if (lu->size == 1){ 420 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 421 } else if (!lu->myid){ 422 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 423 } 424 425 if (lu->size > 1){ 426 /* distributed solution */ 427 lu->id.ICNTL(21) = 1; 428 if (!lu->nSolve){ 429 /* Create x_seq=sol_loc for repeated use */ 430 PetscInt lsol_loc; 431 PetscScalar *sol_loc; 432 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 433 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 434 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 435 lu->id.lsol_loc = lsol_loc; 436 lu->id.sol_loc = (F_DOUBLE *)sol_loc; 437 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 438 } 439 } 440 441 /* solve phase */ 442 /*-------------*/ 443 lu->id.job = 3; 444 #if defined(PETSC_USE_COMPLEX) 445 zmumps_c(&lu->id); 446 #else 447 dmumps_c(&lu->id); 448 #endif 449 if (lu->id.INFOG(1) < 0) { 450 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 451 } 452 453 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 454 if (!lu->nSolve){ /* create scatter scat_sol */ 455 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 456 for (i=0; i<lu->id.lsol_loc; i++){ 457 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 458 } 459 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 460 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 461 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 462 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 463 } 464 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 465 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 466 } 467 lu->nSolve++; 468 PetscFunctionReturn(0); 469 } 470 471 /* 472 input: 473 F: numeric factor 474 output: 475 nneg: total number of negative pivots 476 nzero: 0 477 npos: (global dimension of F) - nneg 478 */ 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 482 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 483 { 484 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 485 PetscErrorCode ierr; 486 PetscMPIInt size; 487 488 PetscFunctionBegin; 489 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 490 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 491 if (size > 1 && lu->id.ICNTL(13) != 1){ 492 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 493 } 494 if (nneg){ 495 if (!lu->myid){ 496 *nneg = lu->id.INFOG(12); 497 } 498 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 499 } 500 if (nzero) *nzero = 0; 501 if (npos) *npos = F->rmap.N - (*nneg); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 507 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 508 { 509 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 510 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 511 PetscErrorCode ierr; 512 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 513 PetscTruth valOnly,flg; 514 Mat F_diag; 515 516 PetscFunctionBegin; 517 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 518 (*F)->ops->solve = MatSolve_AIJMUMPS; 519 520 /* Initialize a MUMPS instance */ 521 ierr = MPI_Comm_rank(A->comm, &lu->myid); 522 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 523 lua->myid = lu->myid; lua->size = lu->size; 524 lu->id.job = JOB_INIT; 525 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 526 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 527 528 /* Set mumps options */ 529 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 530 lu->id.par=1; /* host participates factorizaton and solve */ 531 lu->id.sym=lu->sym; 532 if (lu->sym == 2){ 533 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 534 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 535 } 536 #if defined(PETSC_USE_COMPLEX) 537 zmumps_c(&lu->id); 538 #else 539 dmumps_c(&lu->id); 540 #endif 541 542 if (lu->size == 1){ 543 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 544 } else { 545 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 546 } 547 548 icntl=-1; 549 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 550 if ((flg && icntl > 0) || PetscLogPrintInfo) { 551 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 552 } else { /* no output */ 553 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 554 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 555 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 556 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 557 } 558 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 559 icntl=-1; 560 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 561 if (flg) { 562 if (icntl== 1){ 563 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 564 } else { 565 lu->id.ICNTL(7) = icntl; 566 } 567 } 568 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 569 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 570 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 571 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 572 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 575 576 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 579 PetscOptionsEnd(); 580 } 581 582 /* define matrix A */ 583 switch (lu->id.ICNTL(18)){ 584 case 0: /* centralized assembled matrix input (size=1) */ 585 if (!lu->myid) { 586 if (lua->isAIJ){ 587 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 588 nz = aa->nz; 589 ai = aa->i; aj = aa->j; lu->val = aa->a; 590 } else { 591 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 592 nz = aa->nz; 593 ai = aa->i; aj = aa->j; lu->val = aa->a; 594 } 595 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 596 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 597 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 598 nz = 0; 599 for (i=0; i<M; i++){ 600 rnz = ai[i+1] - ai[i]; 601 while (rnz--) { /* Fortran row/col index! */ 602 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 603 } 604 } 605 } 606 } 607 break; 608 case 3: /* distributed assembled matrix input (size>1) */ 609 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 610 valOnly = PETSC_FALSE; 611 } else { 612 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 613 } 614 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 615 break; 616 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 617 } 618 619 /* analysis phase */ 620 /*----------------*/ 621 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 622 lu->id.job = 1; 623 624 lu->id.n = M; 625 switch (lu->id.ICNTL(18)){ 626 case 0: /* centralized assembled matrix input */ 627 if (!lu->myid) { 628 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 629 if (lu->id.ICNTL(6)>1){ 630 #if defined(PETSC_USE_COMPLEX) 631 lu->id.a = (mumps_double_complex*)lu->val; 632 #else 633 lu->id.a = lu->val; 634 #endif 635 } 636 } 637 break; 638 case 3: /* distributed assembled matrix input (size>1) */ 639 lu->id.nz_loc = nnz; 640 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 641 if (lu->id.ICNTL(6)>1) { 642 #if defined(PETSC_USE_COMPLEX) 643 lu->id.a_loc = (mumps_double_complex*)lu->val; 644 #else 645 lu->id.a_loc = lu->val; 646 #endif 647 } 648 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 649 IS is_iden; 650 Vec b; 651 if (!lu->myid){ 652 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 653 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 654 } else { 655 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 656 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 657 } 658 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 659 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 660 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 661 662 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 663 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 664 ierr = VecDestroy(b);CHKERRQ(ierr); 665 break; 666 } 667 #if defined(PETSC_USE_COMPLEX) 668 zmumps_c(&lu->id); 669 #else 670 dmumps_c(&lu->id); 671 #endif 672 if (lu->id.INFOG(1) < 0) { 673 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 674 } 675 } 676 677 /* numerical factorization phase */ 678 /*-------------------------------*/ 679 lu->id.job = 2; 680 if(!lu->id.ICNTL(18)) { 681 if (!lu->myid) { 682 #if defined(PETSC_USE_COMPLEX) 683 lu->id.a = (mumps_double_complex*)lu->val; 684 #else 685 lu->id.a = lu->val; 686 #endif 687 } 688 } else { 689 #if defined(PETSC_USE_COMPLEX) 690 lu->id.a_loc = (mumps_double_complex*)lu->val; 691 #else 692 lu->id.a_loc = lu->val; 693 #endif 694 } 695 #if defined(PETSC_USE_COMPLEX) 696 zmumps_c(&lu->id); 697 #else 698 dmumps_c(&lu->id); 699 #endif 700 if (lu->id.INFOG(1) < 0) { 701 if (lu->id.INFO(1) == -13) { 702 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 703 } else { 704 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 705 } 706 } 707 708 if (!lu->myid && lu->id.ICNTL(16) > 0){ 709 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 710 } 711 712 if (lu->size > 1){ 713 if ((*F)->factor == FACTOR_LU){ 714 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 715 } else { 716 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 717 } 718 F_diag->assembled = PETSC_TRUE; 719 if (lu->nSolve){ 720 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 721 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 722 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 723 } 724 } 725 (*F)->assembled = PETSC_TRUE; 726 lu->matstruc = SAME_NONZERO_PATTERN; 727 lu->CleanUpMUMPS = PETSC_TRUE; 728 lu->nSolve = 0; 729 PetscFunctionReturn(0); 730 } 731 732 /* Note the Petsc r and c permutations are ignored */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 735 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 736 Mat B; 737 Mat_MUMPS *lu; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 /* Create the factorization matrix */ 742 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 743 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 744 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 745 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 746 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 747 748 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 749 B->factor = FACTOR_LU; 750 lu = (Mat_MUMPS*)B->spptr; 751 lu->sym = 0; 752 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 753 754 *F = B; 755 PetscFunctionReturn(0); 756 } 757 758 /* Note the Petsc r permutation is ignored */ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 761 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 762 Mat B; 763 Mat_MUMPS *lu; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 /* Create the factorization matrix */ 768 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 769 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 770 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 771 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 772 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 773 774 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 775 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 776 B->factor = FACTOR_CHOLESKY; 777 lu = (Mat_MUMPS*)B->spptr; 778 lu->sym = 2; 779 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 780 781 *F = B; 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 787 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 788 PetscErrorCode ierr; 789 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 790 791 PetscFunctionBegin; 792 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 793 794 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 795 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 796 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 797 PetscFunctionReturn(0); 798 } 799 800 EXTERN_C_BEGIN 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 803 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 804 { 805 PetscErrorCode ierr; 806 PetscMPIInt size; 807 MPI_Comm comm; 808 Mat B=*newmat; 809 Mat_MUMPS *mumps; 810 811 PetscFunctionBegin; 812 if (reuse == MAT_INITIAL_MATRIX) { 813 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 814 } 815 816 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 817 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 818 819 mumps->MatDuplicate = A->ops->duplicate; 820 mumps->MatView = A->ops->view; 821 mumps->MatAssemblyEnd = A->ops->assemblyend; 822 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 823 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 824 mumps->MatDestroy = A->ops->destroy; 825 mumps->specialdestroy = MatDestroy_AIJMUMPS; 826 mumps->CleanUpMUMPS = PETSC_FALSE; 827 mumps->isAIJ = PETSC_TRUE; 828 829 B->spptr = (void*)mumps; 830 B->ops->duplicate = MatDuplicate_MUMPS; 831 B->ops->view = MatView_MUMPS; 832 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 833 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 834 B->ops->destroy = MatDestroy_MUMPS; 835 836 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 837 if (size == 1) { 838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 839 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 841 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 842 } else { 843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 844 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 846 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 847 } 848 849 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 850 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 851 *newmat = B; 852 PetscFunctionReturn(0); 853 } 854 EXTERN_C_END 855 856 /*MC 857 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 858 and sequential matrices via the external package MUMPS. 859 860 If MUMPS is installed (see the manual for instructions 861 on how to declare the existence of external packages), 862 a matrix type can be constructed which invokes MUMPS solvers. 863 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 864 865 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 866 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 867 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 868 for communicators controlling multiple processes. It is recommended that you call both of 869 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 870 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 871 without data copy. 872 873 Options Database Keys: 874 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 875 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 876 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 877 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 878 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 879 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 880 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 881 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 882 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 883 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 884 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 885 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 886 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 887 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 888 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 889 890 Level: beginner 891 892 .seealso: MATSBAIJMUMPS 893 M*/ 894 895 EXTERN_C_BEGIN 896 #undef __FUNCT__ 897 #define __FUNCT__ "MatCreate_AIJMUMPS" 898 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 899 { 900 PetscErrorCode ierr; 901 PetscMPIInt size; 902 MPI_Comm comm; 903 904 PetscFunctionBegin; 905 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 906 /* and AIJMUMPS types */ 907 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 908 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 910 if (size == 1) { 911 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 912 } else { 913 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 914 /* 915 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 916 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 917 */ 918 } 919 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 EXTERN_C_END 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 926 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 927 { 928 PetscErrorCode ierr; 929 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 930 931 PetscFunctionBegin; 932 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 933 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 934 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 935 PetscFunctionReturn(0); 936 } 937 938 EXTERN_C_BEGIN 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 941 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 942 { 943 Mat A; 944 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 /* 949 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 950 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 951 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 952 block size info so that PETSc can determine the local size properly. The block size info is set 953 in the preallocation routine. 954 */ 955 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 956 A = ((Mat_MPISBAIJ *)B->data)->A; 957 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 EXTERN_C_END 961 962 EXTERN_C_BEGIN 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 965 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 966 { 967 PetscErrorCode ierr; 968 PetscMPIInt size; 969 MPI_Comm comm; 970 Mat B=*newmat; 971 Mat_MUMPS *mumps; 972 void (*f)(void); 973 974 PetscFunctionBegin; 975 if (reuse == MAT_INITIAL_MATRIX) { 976 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 977 } 978 979 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 980 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 981 982 mumps->MatDuplicate = A->ops->duplicate; 983 mumps->MatView = A->ops->view; 984 mumps->MatAssemblyEnd = A->ops->assemblyend; 985 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 986 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 987 mumps->MatDestroy = A->ops->destroy; 988 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 989 mumps->CleanUpMUMPS = PETSC_FALSE; 990 mumps->isAIJ = PETSC_FALSE; 991 992 B->spptr = (void*)mumps; 993 B->ops->duplicate = MatDuplicate_MUMPS; 994 B->ops->view = MatView_MUMPS; 995 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 996 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 997 B->ops->destroy = MatDestroy_MUMPS; 998 999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1000 if (size == 1) { 1001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 1002 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 1004 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1005 } else { 1006 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 1007 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 1008 if (f) { /* This case should always be true when this routine is called */ 1009 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 1010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1011 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 1012 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 1013 } 1014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 1015 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 1017 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1018 } 1019 1020 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 1021 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 1022 *newmat = B; 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatDuplicate_MUMPS" 1029 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1030 PetscErrorCode ierr; 1031 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1032 1033 PetscFunctionBegin; 1034 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1035 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 /*MC 1040 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1041 distributed and sequential matrices via the external package MUMPS. 1042 1043 If MUMPS is installed (see the manual for instructions 1044 on how to declare the existence of external packages), 1045 a matrix type can be constructed which invokes MUMPS solvers. 1046 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1047 1048 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1049 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1050 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1051 for communicators controlling multiple processes. It is recommended that you call both of 1052 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1053 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1054 without data copy. 1055 1056 Options Database Keys: 1057 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1058 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1059 . -mat_mumps_icntl_4 <0,...,4> - print level 1060 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1061 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1062 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1063 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1064 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1065 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1066 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1067 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1068 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1069 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1070 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1071 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1072 1073 Level: beginner 1074 1075 .seealso: MATAIJMUMPS 1076 M*/ 1077 1078 EXTERN_C_BEGIN 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1081 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1082 { 1083 PetscErrorCode ierr; 1084 PetscMPIInt size; 1085 1086 PetscFunctionBegin; 1087 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1088 /* and SBAIJMUMPS types */ 1089 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1090 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1091 if (size == 1) { 1092 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1093 } else { 1094 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1095 } 1096 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 EXTERN_C_END 1100