1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 175 if (f) { 176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 177 } 178 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 187 188 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 189 *newmat = B; 190 PetscFunctionReturn(0); 191 } 192 EXTERN_C_END 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatDestroy_MUMPS" 196 PetscErrorCode MatDestroy_MUMPS(Mat A) 197 { 198 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 199 PetscErrorCode ierr; 200 PetscMPIInt size=lu->size; 201 PetscErrorCode (*specialdestroy)(Mat); 202 PetscFunctionBegin; 203 if (lu->CleanUpMUMPS) { 204 /* Terminate instance, deallocate memories */ 205 if (size > 1){ 206 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 207 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 208 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 209 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 210 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 211 ierr = PetscFree(lu->val);CHKERRQ(ierr); 212 } 213 lu->id.job=JOB_END; 214 #if defined(PETSC_USE_COMPLEX) 215 zmumps_c(&lu->id); 216 #else 217 dmumps_c(&lu->id); 218 #endif 219 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 220 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 221 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 222 } 223 specialdestroy = lu->specialdestroy; 224 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 225 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatDestroy_AIJMUMPS" 231 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 232 { 233 PetscErrorCode ierr; 234 PetscMPIInt size; 235 236 PetscFunctionBegin; 237 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 238 if (size==1) { 239 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 240 } else { 241 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 248 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 249 { 250 PetscErrorCode ierr; 251 PetscMPIInt size; 252 253 PetscFunctionBegin; 254 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 255 if (size==1) { 256 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 257 } else { 258 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 259 } 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatFactorInfo_MUMPS" 265 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 266 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 278 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 279 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 280 if (!lu->myid && lu->id.ICNTL(11)>0) { 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 282 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 283 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 284 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 285 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 286 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 287 288 } 289 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 293 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 294 295 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 297 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 298 299 /* infomation local to each processor */ 300 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 301 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 302 ierr = PetscSynchronizedFlush(A->comm); 303 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 304 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 305 ierr = PetscSynchronizedFlush(A->comm); 306 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 307 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 308 ierr = PetscSynchronizedFlush(A->comm); 309 310 if (!lu->myid){ /* information from the host */ 311 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 312 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 313 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 314 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 328 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 329 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 331 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 332 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 333 } 334 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "MatView_MUMPS" 340 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 341 PetscErrorCode ierr; 342 PetscTruth iascii; 343 PetscViewerFormat format; 344 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 345 346 PetscFunctionBegin; 347 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 348 349 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 350 if (iascii) { 351 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 352 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 353 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 354 } 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatSolve_AIJMUMPS" 361 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 362 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 363 PetscScalar *array; 364 Vec x_seq; 365 IS is_iden,is_petsc; 366 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 367 PetscErrorCode ierr; 368 PetscInt i; 369 370 PetscFunctionBegin; 371 lu->id.nrhs = 1; 372 x_seq = lu->b_seq; 373 if (lu->size > 1){ 374 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 375 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 376 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 377 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 378 } else { /* size == 1 */ 379 ierr = VecCopy(b,x);CHKERRQ(ierr); 380 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 381 } 382 if (!lu->myid) { /* define rhs on the host */ 383 #if defined(PETSC_USE_COMPLEX) 384 lu->id.rhs = (mumps_double_complex*)array; 385 #else 386 lu->id.rhs = array; 387 #endif 388 } 389 if (lu->size == 1){ 390 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 391 } else if (!lu->myid){ 392 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 393 } 394 395 if (lu->size > 1){ 396 /* distributed solution */ 397 lu->id.ICNTL(21) = 1; 398 if (!lu->nSolve){ 399 /* Create x_seq=sol_loc for repeated use */ 400 PetscInt lsol_loc; 401 PetscScalar *sol_loc; 402 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 403 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 404 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 405 lu->id.lsol_loc = lsol_loc; 406 lu->id.sol_loc = sol_loc; 407 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 408 } 409 } 410 411 /* solve phase */ 412 /*-------------*/ 413 lu->id.job = 3; 414 #if defined(PETSC_USE_COMPLEX) 415 zmumps_c(&lu->id); 416 #else 417 dmumps_c(&lu->id); 418 #endif 419 if (lu->id.INFOG(1) < 0) { 420 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 421 } 422 423 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 424 if (!lu->nSolve){ /* create scatter scat_sol */ 425 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 426 for (i=0; i<lu->id.lsol_loc; i++){ 427 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 428 } 429 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 430 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 431 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 432 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 433 } 434 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 435 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 436 } 437 lu->nSolve++; 438 PetscFunctionReturn(0); 439 } 440 441 /* 442 input: 443 F: numeric factor 444 output: 445 nneg: total number of negative pivots 446 nzero: 0 447 npos: (global dimension of F) - nneg 448 */ 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 452 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 453 { 454 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 455 PetscErrorCode ierr; 456 PetscMPIInt size; 457 458 PetscFunctionBegin; 459 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 460 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 461 if (size > 1 && lu->id.ICNTL(13) != 1){ 462 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 463 } 464 if (nneg){ 465 if (!lu->myid){ 466 *nneg = lu->id.INFOG(12); 467 } 468 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 469 } 470 if (nzero) *nzero = 0; 471 if (npos) *npos = F->rmap.N - (*nneg); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 477 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 478 { 479 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 480 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 481 PetscErrorCode ierr; 482 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 483 PetscTruth valOnly,flg; 484 Mat F_diag; 485 486 PetscFunctionBegin; 487 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 488 (*F)->ops->solve = MatSolve_AIJMUMPS; 489 490 /* Initialize a MUMPS instance */ 491 ierr = MPI_Comm_rank(A->comm, &lu->myid); 492 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 493 lua->myid = lu->myid; lua->size = lu->size; 494 lu->id.job = JOB_INIT; 495 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 496 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 497 498 /* Set mumps options */ 499 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 500 lu->id.par=1; /* host participates factorizaton and solve */ 501 lu->id.sym=lu->sym; 502 if (lu->sym == 2){ 503 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 504 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 505 } 506 #if defined(PETSC_USE_COMPLEX) 507 zmumps_c(&lu->id); 508 #else 509 dmumps_c(&lu->id); 510 #endif 511 512 if (lu->size == 1){ 513 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 514 } else { 515 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 516 } 517 518 icntl=-1; 519 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 520 if ((flg && icntl > 0) || PetscLogPrintInfo) { 521 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 522 } else { /* no output */ 523 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 524 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 525 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 526 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 527 } 528 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 529 icntl=-1; 530 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 531 if (flg) { 532 if (icntl== 1){ 533 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 534 } else { 535 lu->id.ICNTL(7) = icntl; 536 } 537 } 538 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 539 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 540 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 541 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 542 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 543 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 544 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 545 546 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 547 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 548 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 549 PetscOptionsEnd(); 550 } 551 552 /* define matrix A */ 553 switch (lu->id.ICNTL(18)){ 554 case 0: /* centralized assembled matrix input (size=1) */ 555 if (!lu->myid) { 556 if (lua->isAIJ){ 557 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 558 nz = aa->nz; 559 ai = aa->i; aj = aa->j; lu->val = aa->a; 560 } else { 561 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 562 nz = aa->nz; 563 ai = aa->i; aj = aa->j; lu->val = aa->a; 564 } 565 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 566 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 567 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 568 nz = 0; 569 for (i=0; i<M; i++){ 570 rnz = ai[i+1] - ai[i]; 571 while (rnz--) { /* Fortran row/col index! */ 572 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 573 } 574 } 575 } 576 } 577 break; 578 case 3: /* distributed assembled matrix input (size>1) */ 579 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 580 valOnly = PETSC_FALSE; 581 } else { 582 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 583 } 584 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 585 break; 586 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 587 } 588 589 /* analysis phase */ 590 /*----------------*/ 591 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 592 lu->id.job = 1; 593 594 lu->id.n = M; 595 switch (lu->id.ICNTL(18)){ 596 case 0: /* centralized assembled matrix input */ 597 if (!lu->myid) { 598 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 599 if (lu->id.ICNTL(6)>1){ 600 #if defined(PETSC_USE_COMPLEX) 601 lu->id.a = (mumps_double_complex*)lu->val; 602 #else 603 lu->id.a = lu->val; 604 #endif 605 } 606 } 607 break; 608 case 3: /* distributed assembled matrix input (size>1) */ 609 lu->id.nz_loc = nnz; 610 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 611 if (lu->id.ICNTL(6)>1) { 612 #if defined(PETSC_USE_COMPLEX) 613 lu->id.a_loc = (mumps_double_complex*)lu->val; 614 #else 615 lu->id.a_loc = lu->val; 616 #endif 617 } 618 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 619 IS is_iden; 620 Vec b; 621 if (!lu->myid){ 622 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 623 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 624 } else { 625 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 626 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 627 } 628 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 629 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 630 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 631 632 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 633 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 634 ierr = VecDestroy(b);CHKERRQ(ierr); 635 break; 636 } 637 #if defined(PETSC_USE_COMPLEX) 638 zmumps_c(&lu->id); 639 #else 640 dmumps_c(&lu->id); 641 #endif 642 if (lu->id.INFOG(1) < 0) { 643 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 644 } 645 } 646 647 /* numerical factorization phase */ 648 /*-------------------------------*/ 649 lu->id.job = 2; 650 if(!lu->id.ICNTL(18)) { 651 if (!lu->myid) { 652 #if defined(PETSC_USE_COMPLEX) 653 lu->id.a = (mumps_double_complex*)lu->val; 654 #else 655 lu->id.a = lu->val; 656 #endif 657 } 658 } else { 659 #if defined(PETSC_USE_COMPLEX) 660 lu->id.a_loc = (mumps_double_complex*)lu->val; 661 #else 662 lu->id.a_loc = lu->val; 663 #endif 664 } 665 #if defined(PETSC_USE_COMPLEX) 666 zmumps_c(&lu->id); 667 #else 668 dmumps_c(&lu->id); 669 #endif 670 if (lu->id.INFOG(1) < 0) { 671 if (lu->id.INFO(1) == -13) { 672 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 673 } else { 674 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 675 } 676 } 677 678 if (!lu->myid && lu->id.ICNTL(16) > 0){ 679 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 680 } 681 682 if (lu->size > 1){ 683 if ((*F)->factor == FACTOR_LU){ 684 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 685 } else { 686 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 687 } 688 F_diag->assembled = PETSC_TRUE; 689 if (lu->nSolve){ 690 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 691 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 692 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 693 } 694 } 695 (*F)->assembled = PETSC_TRUE; 696 lu->matstruc = SAME_NONZERO_PATTERN; 697 lu->CleanUpMUMPS = PETSC_TRUE; 698 lu->nSolve = 0; 699 PetscFunctionReturn(0); 700 } 701 702 /* Note the Petsc r and c permutations are ignored */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 705 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 706 Mat B; 707 Mat_MUMPS *lu; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 /* Create the factorization matrix */ 712 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 713 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 714 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 715 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 716 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 717 718 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 719 B->factor = FACTOR_LU; 720 lu = (Mat_MUMPS*)B->spptr; 721 lu->sym = 0; 722 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 723 724 *F = B; 725 PetscFunctionReturn(0); 726 } 727 728 /* Note the Petsc r permutation is ignored */ 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 731 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 732 Mat B; 733 Mat_MUMPS *lu; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 /* Create the factorization matrix */ 738 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 739 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 740 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 741 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 742 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 743 744 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 745 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 746 B->factor = FACTOR_CHOLESKY; 747 lu = (Mat_MUMPS*)B->spptr; 748 lu->sym = 2; 749 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 750 751 *F = B; 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 757 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 758 PetscErrorCode ierr; 759 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 760 761 PetscFunctionBegin; 762 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 763 764 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 765 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 766 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 767 PetscFunctionReturn(0); 768 } 769 770 EXTERN_C_BEGIN 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 773 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 774 { 775 PetscErrorCode ierr; 776 PetscMPIInt size; 777 MPI_Comm comm; 778 Mat B=*newmat; 779 Mat_MUMPS *mumps; 780 781 PetscFunctionBegin; 782 if (reuse == MAT_INITIAL_MATRIX) { 783 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 784 } 785 786 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 787 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 788 789 mumps->MatDuplicate = A->ops->duplicate; 790 mumps->MatView = A->ops->view; 791 mumps->MatAssemblyEnd = A->ops->assemblyend; 792 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 793 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 794 mumps->MatDestroy = A->ops->destroy; 795 mumps->specialdestroy = MatDestroy_AIJMUMPS; 796 mumps->CleanUpMUMPS = PETSC_FALSE; 797 mumps->isAIJ = PETSC_TRUE; 798 799 B->spptr = (void*)mumps; 800 B->ops->duplicate = MatDuplicate_MUMPS; 801 B->ops->view = MatView_MUMPS; 802 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 803 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 804 B->ops->destroy = MatDestroy_MUMPS; 805 806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 807 if (size == 1) { 808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 809 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 811 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 812 } else { 813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 814 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 816 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 817 } 818 819 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 820 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 821 *newmat = B; 822 PetscFunctionReturn(0); 823 } 824 EXTERN_C_END 825 826 /*MC 827 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 828 and sequential matrices via the external package MUMPS. 829 830 If MUMPS is installed (see the manual for instructions 831 on how to declare the existence of external packages), 832 a matrix type can be constructed which invokes MUMPS solvers. 833 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 834 835 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 836 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 837 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 838 for communicators controlling multiple processes. It is recommended that you call both of 839 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 840 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 841 without data copy. 842 843 Options Database Keys: 844 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 845 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 846 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 847 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 848 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 849 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 850 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 851 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 852 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 853 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 854 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 855 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 856 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 857 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 858 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 859 860 Level: beginner 861 862 .seealso: MATSBAIJMUMPS 863 M*/ 864 865 EXTERN_C_BEGIN 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatCreate_AIJMUMPS" 868 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 869 { 870 PetscErrorCode ierr; 871 PetscMPIInt size; 872 MPI_Comm comm; 873 874 PetscFunctionBegin; 875 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 876 /* and AIJMUMPS types */ 877 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 878 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 880 if (size == 1) { 881 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 882 } else { 883 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 884 /* 885 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 886 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 887 */ 888 } 889 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 EXTERN_C_END 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 896 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 897 { 898 PetscErrorCode ierr; 899 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 900 901 PetscFunctionBegin; 902 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 903 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 904 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 905 PetscFunctionReturn(0); 906 } 907 908 EXTERN_C_BEGIN 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 911 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 912 { 913 Mat A; 914 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 915 PetscErrorCode ierr; 916 917 PetscFunctionBegin; 918 /* 919 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 920 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 921 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 922 block size info so that PETSc can determine the local size properly. The block size info is set 923 in the preallocation routine. 924 */ 925 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 926 A = ((Mat_MPISBAIJ *)B->data)->A; 927 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 EXTERN_C_END 931 932 EXTERN_C_BEGIN 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 935 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 936 { 937 PetscErrorCode ierr; 938 PetscMPIInt size; 939 MPI_Comm comm; 940 Mat B=*newmat; 941 Mat_MUMPS *mumps; 942 void (*f)(void); 943 944 PetscFunctionBegin; 945 if (reuse == MAT_INITIAL_MATRIX) { 946 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 947 } 948 949 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 950 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 951 952 mumps->MatDuplicate = A->ops->duplicate; 953 mumps->MatView = A->ops->view; 954 mumps->MatAssemblyEnd = A->ops->assemblyend; 955 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 956 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 957 mumps->MatDestroy = A->ops->destroy; 958 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 959 mumps->CleanUpMUMPS = PETSC_FALSE; 960 mumps->isAIJ = PETSC_FALSE; 961 962 B->spptr = (void*)mumps; 963 B->ops->duplicate = MatDuplicate_MUMPS; 964 B->ops->view = MatView_MUMPS; 965 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 966 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 967 B->ops->destroy = MatDestroy_MUMPS; 968 969 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 970 if (size == 1) { 971 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 972 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 973 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 974 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 975 } else { 976 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 977 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 978 if (f) { /* This case should always be true when this routine is called */ 979 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 980 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 981 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 982 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 983 } 984 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 985 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 987 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 988 } 989 990 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 991 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 992 *newmat = B; 993 PetscFunctionReturn(0); 994 } 995 EXTERN_C_END 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatDuplicate_MUMPS" 999 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1000 PetscErrorCode ierr; 1001 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1002 1003 PetscFunctionBegin; 1004 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1005 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 /*MC 1010 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1011 distributed and sequential matrices via the external package MUMPS. 1012 1013 If MUMPS is installed (see the manual for instructions 1014 on how to declare the existence of external packages), 1015 a matrix type can be constructed which invokes MUMPS solvers. 1016 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1017 1018 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1019 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1020 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1021 for communicators controlling multiple processes. It is recommended that you call both of 1022 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1023 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1024 without data copy. 1025 1026 Options Database Keys: 1027 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1028 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1029 . -mat_mumps_icntl_4 <0,...,4> - print level 1030 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1031 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1032 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1033 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1034 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1035 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1036 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1037 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1038 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1039 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1040 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1041 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1042 1043 Level: beginner 1044 1045 .seealso: MATAIJMUMPS 1046 M*/ 1047 1048 EXTERN_C_BEGIN 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1052 { 1053 PetscErrorCode ierr; 1054 PetscMPIInt size; 1055 1056 PetscFunctionBegin; 1057 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1058 /* and SBAIJMUMPS types */ 1059 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1060 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1061 if (size == 1) { 1062 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1063 } else { 1064 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1065 } 1066 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 EXTERN_C_END 1070