1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS_4.3.1 sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 int myid,size,*irn,*jcn,sym; 36 PetscScalar *val; 37 MPI_Comm comm_mumps; 38 39 PetscTruth isAIJ,CleanUpMUMPS; 40 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 41 PetscErrorCode (*MatView)(Mat,PetscViewer); 42 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 43 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 44 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 PetscErrorCode (*specialdestroy)(Mat); 47 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 48 } Mat_MUMPS; 49 50 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 51 EXTERN_C_BEGIN 52 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 53 EXTERN_C_END 54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 55 /* 56 input: 57 A - matrix in mpiaij or mpisbaij (bs=1) format 58 shift - 0: C style output triple; 1: Fortran style output triple. 59 valOnly - FALSE: spaces are allocated and values are set for the triple 60 TRUE: only the values in v array are updated 61 output: 62 nnz - dim of r, c, and v (number of local nonzero entries of A) 63 r, c, v - row and col index, matrix values (matrix triples) 64 */ 65 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 66 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 67 PetscErrorCode ierr; 68 int i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 69 int *row,*col; 70 PetscScalar *av, *bv,*val; 71 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 72 73 PetscFunctionBegin; 74 if (mumps->isAIJ){ 75 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 76 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 77 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 78 nz = aa->nz + bb->nz; 79 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 80 garray = mat->garray; 81 av=aa->a; bv=bb->a; 82 83 } else { 84 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 85 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 86 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 87 if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs); 88 nz = aa->nz + bb->nz; 89 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 90 garray = mat->garray; 91 av=aa->a; bv=bb->a; 92 } 93 94 if (!valOnly){ 95 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 96 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 98 *r = row; *c = col; *v = val; 99 } else { 100 row = *r; col = *c; val = *v; 101 } 102 *nnz = nz; 103 104 jj = 0; irow = rstart; 105 for ( i=0; i<m; i++ ) { 106 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 107 countA = ai[i+1] - ai[i]; 108 countB = bi[i+1] - bi[i]; 109 bjj = bj + bi[i]; 110 111 /* get jB, the starting local col index for the 2nd B-part */ 112 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 113 j=-1; 114 do { 115 j++; 116 if (j == countB) break; 117 jcol = garray[bjj[j]]; 118 } while (jcol < colA_start); 119 jB = j; 120 121 /* B-part, smaller col index */ 122 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 123 for (j=0; j<jB; j++){ 124 jcol = garray[bjj[j]]; 125 if (!valOnly){ 126 row[jj] = irow + shift; col[jj] = jcol + shift; 127 128 } 129 val[jj++] = *bv++; 130 } 131 /* A-part */ 132 for (j=0; j<countA; j++){ 133 if (!valOnly){ 134 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 135 } 136 val[jj++] = *av++; 137 } 138 /* B-part, larger col index */ 139 for (j=jB; j<countB; j++){ 140 if (!valOnly){ 141 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 142 } 143 val[jj++] = *bv++; 144 } 145 irow++; 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 EXTERN_C_BEGIN 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatConvert_MUMPS_Base" 154 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 155 { 156 PetscErrorCode ierr; 157 Mat B=*newmat; 158 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 159 void (*f)(void); 160 161 PetscFunctionBegin; 162 if (reuse == MAT_INITIAL_MATRIX) { 163 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 164 } 165 B->ops->duplicate = mumps->MatDuplicate; 166 B->ops->view = mumps->MatView; 167 B->ops->assemblyend = mumps->MatAssemblyEnd; 168 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 169 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 170 B->ops->destroy = mumps->MatDestroy; 171 172 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 173 if (f) { 174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr); 175 } 176 ierr = PetscFree(mumps);CHKERRQ(ierr); 177 178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 187 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 188 *newmat = B; 189 PetscFunctionReturn(0); 190 } 191 EXTERN_C_END 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatDestroy_MUMPS" 195 PetscErrorCode MatDestroy_MUMPS(Mat A) 196 { 197 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 198 PetscErrorCode ierr; 199 int size=lu->size; 200 PetscErrorCode (*specialdestroy)(Mat); 201 PetscFunctionBegin; 202 if (lu->CleanUpMUMPS) { 203 /* Terminate instance, deallocate memories */ 204 lu->id.job=JOB_END; 205 #if defined(PETSC_USE_COMPLEX) 206 zmumps_c(&lu->id); 207 #else 208 dmumps_c(&lu->id); 209 #endif 210 if (lu->irn) { 211 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 212 } 213 if (lu->jcn) { 214 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 215 } 216 if (size>1 && lu->val) { 217 ierr = PetscFree(lu->val);CHKERRQ(ierr); 218 } 219 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 220 } 221 specialdestroy = lu->specialdestroy; 222 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 223 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatDestroy_AIJMUMPS" 229 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 230 { 231 PetscErrorCode ierr; 232 int size; 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 if (size==1) { 237 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 238 } else { 239 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 246 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 247 { 248 PetscErrorCode ierr; 249 int size; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 253 if (size==1) { 254 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 255 } else { 256 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatFactorInfo_MUMPS" 263 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 264 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 265 PetscErrorCode ierr; 266 267 PetscFunctionBegin; 268 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 278 if (!lu->myid && lu->id.ICNTL(11)>0) { 279 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 282 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 283 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 284 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 285 286 } 287 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 288 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 289 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 292 293 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 296 297 /* infomation local to each processor */ 298 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 299 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(A->comm); 301 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 302 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 303 ierr = PetscSynchronizedFlush(A->comm); 304 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 305 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 306 ierr = PetscSynchronizedFlush(A->comm); 307 308 if (!lu->myid){ /* information from the host */ 309 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 312 313 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 328 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 329 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 331 } 332 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatView_AIJMUMPS" 338 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 339 PetscErrorCode ierr; 340 PetscTruth iascii; 341 PetscViewerFormat format; 342 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 343 344 PetscFunctionBegin; 345 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 346 347 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 348 if (iascii) { 349 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 350 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 351 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatSolve_AIJMUMPS" 359 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 360 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 361 PetscScalar *array; 362 Vec x_seq; 363 IS iden; 364 VecScatter scat; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (lu->size > 1){ 369 if (!lu->myid){ 370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 371 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 372 } else { 373 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 374 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 375 } 376 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 377 ierr = ISDestroy(iden);CHKERRQ(ierr); 378 379 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 380 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 381 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 382 } else { /* size == 1 */ 383 ierr = VecCopy(b,x);CHKERRQ(ierr); 384 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 385 } 386 if (!lu->myid) { /* define rhs on the host */ 387 #if defined(PETSC_USE_COMPLEX) 388 lu->id.rhs = (mumps_double_complex*)array; 389 #else 390 lu->id.rhs = array; 391 #endif 392 } 393 394 /* solve phase */ 395 lu->id.job=3; 396 #if defined(PETSC_USE_COMPLEX) 397 zmumps_c(&lu->id); 398 #else 399 dmumps_c(&lu->id); 400 #endif 401 if (lu->id.INFOG(1) < 0) { 402 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 403 } 404 405 /* convert mumps solution x_seq to petsc mpi x */ 406 if (lu->size > 1) { 407 if (!lu->myid){ 408 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 409 } 410 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 411 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 412 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 413 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 414 } else { 415 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 416 } 417 418 PetscFunctionReturn(0); 419 } 420 421 /* 422 input: 423 F: numeric factor 424 output: 425 nneg: total number of negative pivots 426 nzero: 0 427 npos: (global dimension of F) - nneg 428 */ 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 432 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 433 { 434 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 435 PetscErrorCode ierr; 436 int size; 437 438 PetscFunctionBegin; 439 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 440 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 441 if (size > 1 && lu->id.ICNTL(13) != 1){ 442 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 443 } 444 if (nneg){ 445 if (!lu->myid){ 446 *nneg = lu->id.INFOG(12); 447 } 448 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 449 } 450 if (nzero) *nzero = 0; 451 if (npos) *npos = F->M - (*nneg); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 457 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 458 { 459 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 460 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 461 PetscErrorCode ierr; 462 PetscInt rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl; 463 PetscTruth valOnly,flg; 464 Mat F_diag; 465 466 PetscFunctionBegin; 467 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 468 (*F)->ops->solve = MatSolve_AIJMUMPS; 469 470 /* Initialize a MUMPS instance */ 471 ierr = MPI_Comm_rank(A->comm, &lu->myid); 472 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 473 lua->myid = lu->myid; lua->size = lu->size; 474 lu->id.job = JOB_INIT; 475 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 476 lu->id.comm_fortran = lu->comm_mumps; 477 478 /* Set mumps options */ 479 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 480 lu->id.par=1; /* host participates factorizaton and solve */ 481 lu->id.sym=lu->sym; 482 if (lu->sym == 2){ 483 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 484 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 485 } 486 #if defined(PETSC_USE_COMPLEX) 487 zmumps_c(&lu->id); 488 #else 489 dmumps_c(&lu->id); 490 #endif 491 492 if (lu->size == 1){ 493 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 494 } else { 495 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 496 } 497 498 icntl=-1; 499 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 500 if ((flg && icntl > 0) || PetscLogPrintInfo) { 501 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 502 } else { /* no output */ 503 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 504 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 505 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 506 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 507 } 508 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 509 icntl=-1; 510 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 511 if (flg) { 512 if (icntl== 1){ 513 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 514 } else { 515 lu->id.ICNTL(7) = icntl; 516 } 517 } 518 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 522 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 524 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 525 526 /* 527 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 528 if (flg){ 529 if (icntl >-1 && icntl <3 ){ 530 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 531 } else { 532 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 533 } 534 } 535 */ 536 537 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 538 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 539 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 540 PetscOptionsEnd(); 541 } 542 543 /* define matrix A */ 544 switch (lu->id.ICNTL(18)){ 545 case 0: /* centralized assembled matrix input (size=1) */ 546 if (!lu->myid) { 547 if (lua->isAIJ){ 548 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 549 nz = aa->nz; 550 ai = aa->i; aj = aa->j; lu->val = aa->a; 551 } else { 552 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 553 nz = aa->nz; 554 ai = aa->i; aj = aa->j; lu->val = aa->a; 555 } 556 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 557 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 558 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 559 nz = 0; 560 for (i=0; i<M; i++){ 561 rnz = ai[i+1] - ai[i]; 562 while (rnz--) { /* Fortran row/col index! */ 563 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 564 } 565 } 566 } 567 } 568 break; 569 case 3: /* distributed assembled matrix input (size>1) */ 570 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 571 valOnly = PETSC_FALSE; 572 } else { 573 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 574 } 575 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 576 break; 577 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 578 } 579 580 /* analysis phase */ 581 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 582 lu->id.n = M; 583 switch (lu->id.ICNTL(18)){ 584 case 0: /* centralized assembled matrix input */ 585 if (!lu->myid) { 586 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 587 if (lu->id.ICNTL(6)>1){ 588 #if defined(PETSC_USE_COMPLEX) 589 lu->id.a = (mumps_double_complex*)lu->val; 590 #else 591 lu->id.a = lu->val; 592 #endif 593 } 594 } 595 break; 596 case 3: /* distributed assembled matrix input (size>1) */ 597 lu->id.nz_loc = nnz; 598 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 599 if (lu->id.ICNTL(6)>1) { 600 #if defined(PETSC_USE_COMPLEX) 601 lu->id.a_loc = (mumps_double_complex*)lu->val; 602 #else 603 lu->id.a_loc = lu->val; 604 #endif 605 } 606 break; 607 } 608 lu->id.job=1; 609 #if defined(PETSC_USE_COMPLEX) 610 zmumps_c(&lu->id); 611 #else 612 dmumps_c(&lu->id); 613 #endif 614 if (lu->id.INFOG(1) < 0) { 615 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 616 } 617 } 618 619 /* numerical factorization phase */ 620 if(!lu->id.ICNTL(18)) { 621 if (!lu->myid) { 622 #if defined(PETSC_USE_COMPLEX) 623 lu->id.a = (mumps_double_complex*)lu->val; 624 #else 625 lu->id.a = lu->val; 626 #endif 627 } 628 } else { 629 #if defined(PETSC_USE_COMPLEX) 630 lu->id.a_loc = (mumps_double_complex*)lu->val; 631 #else 632 lu->id.a_loc = lu->val; 633 #endif 634 } 635 lu->id.job=2; 636 #if defined(PETSC_USE_COMPLEX) 637 zmumps_c(&lu->id); 638 #else 639 dmumps_c(&lu->id); 640 #endif 641 if (lu->id.INFOG(1) < 0) { 642 if (lu->id.INFO(1) == -13) { 643 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 644 } else { 645 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 646 } 647 } 648 649 if (!lu->myid && lu->id.ICNTL(16) > 0){ 650 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 651 } 652 653 if ((*F)->factor == FACTOR_LU){ 654 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 655 } else { 656 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 657 } 658 F_diag->assembled = PETSC_TRUE; 659 (*F)->assembled = PETSC_TRUE; 660 lu->matstruc = SAME_NONZERO_PATTERN; 661 lu->CleanUpMUMPS = PETSC_TRUE; 662 PetscFunctionReturn(0); 663 } 664 665 /* Note the Petsc r and c permutations are ignored */ 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 668 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 669 Mat B; 670 Mat_MUMPS *lu; 671 PetscErrorCode ierr; 672 673 PetscFunctionBegin; 674 675 /* Create the factorization matrix */ 676 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 677 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 678 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 679 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 680 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 681 682 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 683 B->factor = FACTOR_LU; 684 lu = (Mat_MUMPS*)B->spptr; 685 lu->sym = 0; 686 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 687 688 *F = B; 689 PetscFunctionReturn(0); 690 } 691 692 /* Note the Petsc r permutation is ignored */ 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 695 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 696 Mat B; 697 Mat_MUMPS *lu; 698 PetscErrorCode ierr; 699 700 PetscFunctionBegin; 701 /* Create the factorization matrix */ 702 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 703 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 704 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 705 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 706 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 707 708 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 709 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 710 B->factor = FACTOR_CHOLESKY; 711 lu = (Mat_MUMPS*)B->spptr; 712 lu->sym = 2; 713 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 714 715 *F = B; 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 721 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 722 PetscErrorCode ierr; 723 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 724 725 PetscFunctionBegin; 726 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 727 728 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 729 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 730 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 731 PetscFunctionReturn(0); 732 } 733 734 EXTERN_C_BEGIN 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 737 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 738 { 739 PetscErrorCode ierr; 740 PetscMPIInt size; 741 MPI_Comm comm; 742 Mat B=*newmat; 743 Mat_MUMPS *mumps; 744 745 PetscFunctionBegin; 746 if (reuse == MAT_INITIAL_MATRIX) { 747 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 748 } 749 750 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 751 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 752 753 mumps->MatDuplicate = A->ops->duplicate; 754 mumps->MatView = A->ops->view; 755 mumps->MatAssemblyEnd = A->ops->assemblyend; 756 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 757 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 758 mumps->MatDestroy = A->ops->destroy; 759 mumps->specialdestroy = MatDestroy_AIJMUMPS; 760 mumps->CleanUpMUMPS = PETSC_FALSE; 761 mumps->isAIJ = PETSC_TRUE; 762 763 B->spptr = (void*)mumps; 764 B->ops->duplicate = MatDuplicate_MUMPS; 765 B->ops->view = MatView_AIJMUMPS; 766 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 767 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 768 B->ops->destroy = MatDestroy_MUMPS; 769 770 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 771 if (size == 1) { 772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 773 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 775 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 776 } else { 777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 778 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 780 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 781 } 782 783 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for LU factorization and solves.\n"));CHKERRQ(ierr); 784 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 785 *newmat = B; 786 PetscFunctionReturn(0); 787 } 788 EXTERN_C_END 789 790 /*MC 791 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 792 and sequential matrices via the external package MUMPS. 793 794 If MUMPS is installed (see the manual for instructions 795 on how to declare the existence of external packages), 796 a matrix type can be constructed which invokes MUMPS solvers. 797 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 798 799 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 800 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 801 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 802 for communicators controlling multiple processes. It is recommended that you call both of 803 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 804 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 805 without data copy. 806 807 Options Database Keys: 808 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 809 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 810 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 811 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 812 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 813 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 814 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 815 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 816 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 817 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 818 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 819 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 820 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 821 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 822 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 823 824 Level: beginner 825 826 .seealso: MATSBAIJMUMPS 827 M*/ 828 829 EXTERN_C_BEGIN 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatCreate_AIJMUMPS" 832 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 833 { 834 PetscErrorCode ierr; 835 int size; 836 Mat A_diag; 837 MPI_Comm comm; 838 839 PetscFunctionBegin; 840 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 841 /* and AIJMUMPS types */ 842 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 843 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 844 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 845 if (size == 1) { 846 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 847 } else { 848 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 849 A_diag = ((Mat_MPIAIJ *)A->data)->A; 850 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 851 } 852 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 EXTERN_C_END 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 859 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 860 { 861 PetscErrorCode ierr; 862 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 863 864 PetscFunctionBegin; 865 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 866 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 867 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 868 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 869 PetscFunctionReturn(0); 870 } 871 872 EXTERN_C_BEGIN 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 875 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 876 { 877 Mat A; 878 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 /* 883 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 884 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 885 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 886 block size info so that PETSc can determine the local size properly. The block size info is set 887 in the preallocation routine. 888 */ 889 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 890 A = ((Mat_MPISBAIJ *)B->data)->A; 891 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 EXTERN_C_END 895 896 EXTERN_C_BEGIN 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 899 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 900 { 901 PetscErrorCode ierr; 902 PetscMPIInt size; 903 MPI_Comm comm; 904 Mat B=*newmat; 905 Mat_MUMPS *mumps; 906 void (*f)(void); 907 908 PetscFunctionBegin; 909 if (reuse == MAT_INITIAL_MATRIX) { 910 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 911 } 912 913 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 914 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 915 916 mumps->MatDuplicate = A->ops->duplicate; 917 mumps->MatView = A->ops->view; 918 mumps->MatAssemblyEnd = A->ops->assemblyend; 919 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 920 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 921 mumps->MatDestroy = A->ops->destroy; 922 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 923 mumps->CleanUpMUMPS = PETSC_FALSE; 924 mumps->isAIJ = PETSC_FALSE; 925 926 B->spptr = (void*)mumps; 927 B->ops->duplicate = MatDuplicate_MUMPS; 928 B->ops->view = MatView_AIJMUMPS; 929 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 930 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 931 B->ops->destroy = MatDestroy_MUMPS; 932 933 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 934 if (size == 1) { 935 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 936 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 937 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 938 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 939 } else { 940 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 941 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 942 if (f) { /* This case should always be true when this routine is called */ 943 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 944 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 945 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 946 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 947 } 948 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 949 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 950 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 951 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 952 } 953 954 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for Cholesky factorization and solves.\n"));CHKERRQ(ierr); 955 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 956 *newmat = B; 957 PetscFunctionReturn(0); 958 } 959 EXTERN_C_END 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatDuplicate_MUMPS" 963 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 964 PetscErrorCode ierr; 965 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 966 967 PetscFunctionBegin; 968 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 969 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 /*MC 974 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 975 distributed and sequential matrices via the external package MUMPS. 976 977 If MUMPS is installed (see the manual for instructions 978 on how to declare the existence of external packages), 979 a matrix type can be constructed which invokes MUMPS solvers. 980 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 981 982 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 983 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 984 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 985 for communicators controlling multiple processes. It is recommended that you call both of 986 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 987 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 988 without data copy. 989 990 Options Database Keys: 991 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 992 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 993 . -mat_mumps_icntl_4 <0,...,4> - print level 994 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 995 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 996 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 997 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 998 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 999 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1000 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1001 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1002 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1003 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1004 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1005 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1006 1007 Level: beginner 1008 1009 .seealso: MATAIJMUMPS 1010 M*/ 1011 1012 EXTERN_C_BEGIN 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1015 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1016 { 1017 PetscErrorCode ierr; 1018 int size; 1019 1020 PetscFunctionBegin; 1021 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1022 /* and SBAIJMUMPS types */ 1023 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1024 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1025 if (size == 1) { 1026 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1027 } else { 1028 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1029 } 1030 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1031 PetscFunctionReturn(0); 1032 } 1033 EXTERN_C_END 1034