1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/sbaij/seq/sbaij.h> 7 #include <../src/mat/impls/dense/seq/dense.h> 8 #include <petscblaslapack.h> 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <math.h> 13 #include <mkl_pardiso.h> 14 15 PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int); 16 17 /* 18 * Possible mkl_pardiso phases that controls the execution of the solver. 19 * For more information check mkl_pardiso manual. 20 */ 21 #define JOB_ANALYSIS 11 22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 24 #define JOB_NUMERICAL_FACTORIZATION 22 25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 30 #define JOB_RELEASE_OF_LU_MEMORY 0 31 #define JOB_RELEASE_OF_ALL_MEMORY -1 32 33 #define IPARM_SIZE 64 34 35 #if defined(PETSC_USE_64BIT_INDICES) 36 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 37 /* sizeof(MKL_INT) == sizeof(long long int) if ilp64*/ 38 #define INT_TYPE long long int 39 #define MKL_PARDISO pardiso 40 #define MKL_PARDISO_INIT pardisoinit 41 #else 42 #define INT_TYPE long long int 43 #define MKL_PARDISO pardiso_64 44 #define MKL_PARDISO_INIT pardiso_64init 45 #endif 46 #else 47 #define INT_TYPE int 48 #define MKL_PARDISO pardiso 49 #define MKL_PARDISO_INIT pardisoinit 50 #endif 51 52 53 /* 54 * Internal data structure. 55 * For more information check mkl_pardiso manual. 56 */ 57 typedef struct { 58 59 /* Configuration vector*/ 60 INT_TYPE iparm[IPARM_SIZE]; 61 62 /* 63 * Internal mkl_pardiso memory location. 64 * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak. 65 */ 66 void *pt[IPARM_SIZE]; 67 68 /* Basic mkl_pardiso info*/ 69 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 70 71 /* Matrix structure*/ 72 void *a; 73 INT_TYPE *ia, *ja; 74 75 /* Number of non-zero elements*/ 76 INT_TYPE nz; 77 78 /* Row permutaton vector*/ 79 INT_TYPE *perm; 80 81 /* Define if matrix preserves sparse structure.*/ 82 MatStructure matstruc; 83 84 PetscBool needsym; 85 PetscBool freeaij; 86 87 /* Schur complement */ 88 PetscScalar *schur; 89 PetscInt schur_size; 90 PetscInt *schur_idxs; 91 PetscScalar *schur_work; 92 PetscBLASInt schur_work_size; 93 PetscInt schur_solver_type; 94 PetscInt *schur_pivots; 95 PetscBool schur_factored; 96 PetscBool schur_inverted; 97 PetscBool solve_interior; 98 99 /* True if mkl_pardiso function have been used.*/ 100 PetscBool CleanUp; 101 102 /* Conversion to a format suitable for MKL */ 103 PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**); 104 } Mat_MKL_PARDISO; 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMKLPardiso_Convert_seqsbaij" 108 PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 109 { 110 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 111 PetscInt bs = A->rmap->bs; 112 113 PetscFunctionBegin; 114 if (!sym) { 115 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen"); 116 } 117 if (bs == 1) { /* already in the correct format */ 118 *v = aa->a; 119 *r = aa->i; 120 *c = aa->j; 121 *nnz = aa->nz; 122 *free = PETSC_FALSE; 123 } else { 124 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented"); 125 } 126 PetscFunctionReturn(0); 127 } 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatMKLPardiso_Convert_seqbaij" 131 PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 132 { 133 PetscFunctionBegin; 134 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented"); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatMKLPardiso_Convert_seqaij" 140 PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 141 { 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 if (!sym) { /* already in the correct format */ 147 *v = aa->a; 148 *r = aa->i; 149 *c = aa->j; 150 *nnz = aa->nz; 151 *free = PETSC_FALSE; 152 PetscFunctionReturn(0); 153 } 154 /* need to get the triangular part */ 155 if (reuse == MAT_INITIAL_MATRIX) { 156 PetscScalar *vals,*vv; 157 PetscInt *row,*col,*jj; 158 PetscInt m = A->rmap->n,nz,i; 159 160 nz = 0; 161 for (i=0; i<m; i++) { 162 nz += aa->i[i+1] - aa->diag[i]; 163 } 164 ierr = PetscMalloc2(m+1,&row,nz,&col);CHKERRQ(ierr); 165 ierr = PetscMalloc1(nz,&vals);CHKERRQ(ierr); 166 jj = col; 167 vv = vals; 168 169 row[0] = 0; 170 for (i=0; i<m; i++) { 171 PetscInt *aj = aa->j + aa->diag[i]; 172 PetscScalar *av = aa->a + aa->diag[i]; 173 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 174 for (j=0; j<rl; j++) { 175 *jj = *aj; jj++; aj++; 176 *vv = *av; vv++; av++; 177 } 178 row[i+1] = row[i] + rl; 179 } 180 *v = vals; 181 *r = row; 182 *c = col; 183 *nnz = nz; 184 *free = PETSC_TRUE; 185 } else { 186 PetscScalar *vv; 187 PetscInt m = A->rmap->n,i; 188 189 vv = *v; 190 for (i=0; i<m; i++) { 191 PetscScalar *av = aa->a + aa->diag[i]; 192 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 193 for (j=0; j<rl; j++) { 194 *vv = *av; vv++; av++; 195 } 196 } 197 *free = PETSC_TRUE; 198 } 199 PetscFunctionReturn(0); 200 } 201 202 void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm []) 203 { 204 int iparm_copy[IPARM_SIZE], mtype_copy, i; 205 206 mtype_copy = *mtype; 207 pardisoinit(pt, &mtype_copy, iparm_copy); 208 for(i = 0; i < IPARM_SIZE; i++){ 209 iparm[i] = iparm_copy[i]; 210 } 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMKLPardisoFactorSchur_Private" 215 static PetscErrorCode MatMKLPardisoFactorSchur_Private(Mat_MKL_PARDISO* mpardiso) 216 { 217 PetscBLASInt B_N,B_ierr; 218 PetscScalar *work,val; 219 PetscBLASInt lwork = -1; 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 if (mpardiso->schur_factored) { 224 PetscFunctionReturn(0); 225 } 226 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 227 switch (mpardiso->schur_solver_type) { 228 case 1: /* hermitian solver */ 229 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 230 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 231 ierr = PetscFPTrapPop();CHKERRQ(ierr); 232 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 233 break; 234 case 2: /* symmetric */ 235 if (!mpardiso->schur_pivots) { 236 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 237 } 238 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 239 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&val,&lwork,&B_ierr)); 240 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(val),&lwork);CHKERRQ(ierr); 241 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 242 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,work,&lwork,&B_ierr)); 243 ierr = PetscFree(work);CHKERRQ(ierr); 244 ierr = PetscFPTrapPop();CHKERRQ(ierr); 245 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 246 break; 247 default: /* general */ 248 if (!mpardiso->schur_pivots) { 249 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 250 } 251 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 252 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&B_ierr)); 253 ierr = PetscFPTrapPop();CHKERRQ(ierr); 254 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 255 break; 256 } 257 mpardiso->schur_factored = PETSC_TRUE; 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatMKLPardisoInvertSchur_Private" 263 static PetscErrorCode MatMKLPardisoInvertSchur_Private(Mat_MKL_PARDISO* mpardiso) 264 { 265 PetscBLASInt B_N,B_ierr; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 270 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 271 switch (mpardiso->schur_solver_type) { 272 case 1: /* hermitian solver */ 273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 274 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 275 ierr = PetscFPTrapPop();CHKERRQ(ierr); 276 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 277 break; 278 case 2: /* symmetric */ 279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 280 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&B_ierr)); 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 283 break; 284 default: /* general */ 285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 286 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr)); 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 289 break; 290 } 291 mpardiso->schur_inverted = PETSC_TRUE; 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatMKLPardisoSolveSchur_Private" 297 static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat_MKL_PARDISO* mpardiso, PetscScalar *B, PetscScalar *X) 298 { 299 PetscScalar one=1.,zero=0.,*schur_rhs,*schur_sol; 300 PetscBLASInt B_N,B_Nrhs,B_ierr; 301 char type[2]; 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 306 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 307 ierr = PetscBLASIntCast(mpardiso->nrhs,&B_Nrhs);CHKERRQ(ierr); 308 if (X == B && mpardiso->schur_inverted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address"); 309 if (X != B) { /* using LAPACK *TRS subroutines */ 310 ierr = PetscMemcpy(X,B,B_N*B_Nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 311 } 312 schur_rhs = B; 313 schur_sol = X; 314 switch (mpardiso->schur_solver_type) { 315 case 1: /* hermitian solver */ 316 if (mpardiso->schur_inverted) { /* BLAShemm should go here */ 317 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 318 } else { 319 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 320 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,schur_sol,&B_N,&B_ierr)); 321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 322 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRS Lapack routine %d",(int)B_ierr); 323 } 324 break; 325 case 2: /* symmetric solver */ 326 if (mpardiso->schur_inverted) { 327 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 328 } else { 329 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 330 PetscStackCallBLAS("LAPACKsytrs",LAPACKsytrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 331 ierr = PetscFPTrapPop();CHKERRQ(ierr); 332 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRS Lapack routine %d",(int)B_ierr); 333 } 334 break; 335 default: /* general */ 336 switch (mpardiso->iparm[12-1]) { 337 case 1: 338 sprintf(type,"C"); 339 break; 340 case 2: 341 sprintf(type,"T"); 342 break; 343 default: 344 sprintf(type,"N"); 345 break; 346 } 347 if (mpardiso->schur_inverted) { 348 PetscStackCallBLAS("BLASgemm",BLASgemm_(type,"N",&B_N,&B_Nrhs,&B_N,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 349 } else { 350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 351 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_(type,&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 353 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRS Lapack routine %d",(int)B_ierr); 354 } 355 break; 356 } 357 PetscFunctionReturn(0); 358 } 359 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatFactorSetSchurIS_MKL_PARDISO" 363 PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is) 364 { 365 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 366 const PetscInt *idxs; 367 PetscInt size,i; 368 PetscMPIInt csize; 369 PetscBool sorted; 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);CHKERRQ(ierr); 374 if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc\n"); 375 ierr = ISSorted(is,&sorted);CHKERRQ(ierr); 376 if (!sorted) { 377 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted\n"); 378 } 379 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 380 if (mpardiso->schur_size != size) { 381 mpardiso->schur_size = size; 382 ierr = PetscFree2(mpardiso->schur,mpardiso->schur_work);CHKERRQ(ierr); 383 ierr = PetscFree(mpardiso->schur_idxs);CHKERRQ(ierr); 384 ierr = PetscFree(mpardiso->schur_pivots);CHKERRQ(ierr); 385 ierr = PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);CHKERRQ(ierr); 386 ierr = PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);CHKERRQ(ierr); 387 ierr = PetscMalloc1(size,&mpardiso->schur_idxs);CHKERRQ(ierr); 388 } 389 ierr = PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));CHKERRQ(ierr); 390 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 391 ierr = PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 392 for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1; 393 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 394 if (size) { /* turn on Schur switch if we the set of indices is not empty */ 395 mpardiso->iparm[36-1] = 2; 396 } 397 mpardiso->schur_factored = PETSC_FALSE; 398 mpardiso->schur_inverted = PETSC_FALSE; 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatFactorCreateSchurComplement_MKL_PARDISO" 404 PetscErrorCode MatFactorCreateSchurComplement_MKL_PARDISO(Mat F,Mat* S) 405 { 406 Mat St; 407 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 408 PetscScalar *array; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 413 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 414 415 ierr = MatCreate(PetscObjectComm((PetscObject)F),&St);CHKERRQ(ierr); 416 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mpardiso->schur_size,mpardiso->schur_size);CHKERRQ(ierr); 417 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 418 ierr = MatSetUp(St);CHKERRQ(ierr); 419 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 420 ierr = PetscMemcpy(array,mpardiso->schur,mpardiso->schur_size*mpardiso->schur_size*sizeof(PetscScalar));CHKERRQ(ierr); 421 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 422 *S = St; 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatFactorGetSchurComplement_MKL_PARDISO" 428 PetscErrorCode MatFactorGetSchurComplement_MKL_PARDISO(Mat F,Mat* S) 429 { 430 Mat St; 431 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 436 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 437 438 ierr = MatCreateSeqDense(PetscObjectComm((PetscObject)F),mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&St);CHKERRQ(ierr); 439 *S = St; 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatFactorInvertSchurComplement_MKL_PARDISO" 445 PetscErrorCode MatFactorInvertSchurComplement_MKL_PARDISO(Mat F) 446 { 447 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 if (!mpardiso->iparm[36-1]) { /* do nothing */ 452 PetscFunctionReturn(0); 453 } 454 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 455 ierr = MatMKLPardisoInvertSchur_Private(mpardiso);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatFactorSolveSchurComplement_MKL_PARDISO" 461 PetscErrorCode MatFactorSolveSchurComplement_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 462 { 463 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 464 PetscScalar *asol,*arhs; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 469 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 470 471 mpardiso->nrhs = 1; 472 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 473 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 474 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 476 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatFactorSolveSchurComplementTranspose_MKL_PARDISO" 482 PetscErrorCode MatFactorSolveSchurComplementTranspose_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 483 { 484 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 485 PetscScalar *asol,*arhs; 486 PetscInt oiparm12; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 491 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 492 493 mpardiso->nrhs = 1; 494 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 495 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 496 oiparm12 = mpardiso->iparm[12 - 1]; 497 mpardiso->iparm[12 - 1] = 2; 498 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 499 mpardiso->iparm[12 - 1] = oiparm12; 500 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 501 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatDestroy_MKL_PARDISO" 507 PetscErrorCode MatDestroy_MKL_PARDISO(Mat A) 508 { 509 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 510 PetscErrorCode ierr; 511 512 PetscFunctionBegin; 513 if (mat_mkl_pardiso->CleanUp) { 514 mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 515 516 MKL_PARDISO (mat_mkl_pardiso->pt, 517 &mat_mkl_pardiso->maxfct, 518 &mat_mkl_pardiso->mnum, 519 &mat_mkl_pardiso->mtype, 520 &mat_mkl_pardiso->phase, 521 &mat_mkl_pardiso->n, 522 NULL, 523 NULL, 524 NULL, 525 mat_mkl_pardiso->perm, 526 &mat_mkl_pardiso->nrhs, 527 mat_mkl_pardiso->iparm, 528 &mat_mkl_pardiso->msglvl, 529 NULL, 530 NULL, 531 &mat_mkl_pardiso->err); 532 } 533 ierr = PetscFree(mat_mkl_pardiso->perm);CHKERRQ(ierr); 534 ierr = PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 535 ierr = PetscFree(mat_mkl_pardiso->schur_idxs);CHKERRQ(ierr); 536 ierr = PetscFree(mat_mkl_pardiso->schur_pivots);CHKERRQ(ierr); 537 if (mat_mkl_pardiso->freeaij) { 538 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 539 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 540 } 541 ierr = PetscFree(A->data);CHKERRQ(ierr); 542 543 /* clear composed functions */ 544 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 545 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 546 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 547 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSchurComplement_C",NULL);CHKERRQ(ierr); 548 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorInvertSchurComplement_C",NULL);CHKERRQ(ierr); 549 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplement_C",NULL);CHKERRQ(ierr); 550 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplementTranspose_C",NULL);CHKERRQ(ierr); 551 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatMKLPardisoScatterSchur_Private" 557 static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce) 558 { 559 PetscFunctionBegin; 560 if (reduce) { /* data given for the whole matrix */ 561 PetscInt i,m=0,p=0; 562 for (i=0;i<mpardiso->nrhs;i++) { 563 PetscInt j; 564 for (j=0;j<mpardiso->schur_size;j++) { 565 schur[p+j] = whole[m+mpardiso->schur_idxs[j]]; 566 } 567 m += mpardiso->n; 568 p += mpardiso->schur_size; 569 } 570 } else { /* from Schur to whole */ 571 PetscInt i,m=0,p=0; 572 for (i=0;i<mpardiso->nrhs;i++) { 573 PetscInt j; 574 for (j=0;j<mpardiso->schur_size;j++) { 575 whole[m+mpardiso->schur_idxs[j]] = schur[p+j]; 576 } 577 m += mpardiso->n; 578 p += mpardiso->schur_size; 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatSolve_MKL_PARDISO" 586 PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x) 587 { 588 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data; 589 PetscErrorCode ierr; 590 PetscScalar *xarray; 591 const PetscScalar *barray; 592 593 PetscFunctionBegin; 594 mat_mkl_pardiso->nrhs = 1; 595 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 597 598 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 599 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 600 mat_mkl_pardiso->iparm[6-1] = 0; 601 602 MKL_PARDISO (mat_mkl_pardiso->pt, 603 &mat_mkl_pardiso->maxfct, 604 &mat_mkl_pardiso->mnum, 605 &mat_mkl_pardiso->mtype, 606 &mat_mkl_pardiso->phase, 607 &mat_mkl_pardiso->n, 608 mat_mkl_pardiso->a, 609 mat_mkl_pardiso->ia, 610 mat_mkl_pardiso->ja, 611 mat_mkl_pardiso->perm, 612 &mat_mkl_pardiso->nrhs, 613 mat_mkl_pardiso->iparm, 614 &mat_mkl_pardiso->msglvl, 615 (void*)barray, 616 (void*)xarray, 617 &mat_mkl_pardiso->err); 618 619 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 620 621 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 622 PetscInt shift = mat_mkl_pardiso->schur_size; 623 624 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 625 if (!mat_mkl_pardiso->schur_inverted) { 626 shift = 0; 627 } 628 629 if (!mat_mkl_pardiso->solve_interior) { 630 /* solve Schur complement */ 631 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 633 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 634 } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substitued to xarray[schur] will be neglected */ 635 PetscInt i; 636 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 637 xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.; 638 } 639 } 640 /* expansion phase */ 641 mat_mkl_pardiso->iparm[6-1] = 1; 642 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 643 MKL_PARDISO (mat_mkl_pardiso->pt, 644 &mat_mkl_pardiso->maxfct, 645 &mat_mkl_pardiso->mnum, 646 &mat_mkl_pardiso->mtype, 647 &mat_mkl_pardiso->phase, 648 &mat_mkl_pardiso->n, 649 mat_mkl_pardiso->a, 650 mat_mkl_pardiso->ia, 651 mat_mkl_pardiso->ja, 652 mat_mkl_pardiso->perm, 653 &mat_mkl_pardiso->nrhs, 654 mat_mkl_pardiso->iparm, 655 &mat_mkl_pardiso->msglvl, 656 (void*)xarray, 657 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 658 &mat_mkl_pardiso->err); 659 660 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 661 mat_mkl_pardiso->iparm[6-1] = 0; 662 } 663 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 664 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 665 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatSolveTranspose_MKL_PARDISO" 671 PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x) 672 { 673 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 674 PetscInt oiparm12; 675 PetscErrorCode ierr; 676 677 PetscFunctionBegin; 678 oiparm12 = mat_mkl_pardiso->iparm[12 - 1]; 679 mat_mkl_pardiso->iparm[12 - 1] = 2; 680 ierr = MatSolve_MKL_PARDISO(A,b,x);CHKERRQ(ierr); 681 mat_mkl_pardiso->iparm[12 - 1] = oiparm12; 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatMatSolve_MKL_PARDISO" 687 PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X) 688 { 689 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data; 690 PetscErrorCode ierr; 691 PetscScalar *barray, *xarray; 692 PetscBool flg; 693 694 PetscFunctionBegin; 695 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 696 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 697 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 698 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 699 700 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 701 702 if(mat_mkl_pardiso->nrhs > 0){ 703 ierr = MatDenseGetArray(B,&barray); 704 ierr = MatDenseGetArray(X,&xarray); 705 706 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 707 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 708 mat_mkl_pardiso->iparm[6-1] = 0; 709 710 MKL_PARDISO (mat_mkl_pardiso->pt, 711 &mat_mkl_pardiso->maxfct, 712 &mat_mkl_pardiso->mnum, 713 &mat_mkl_pardiso->mtype, 714 &mat_mkl_pardiso->phase, 715 &mat_mkl_pardiso->n, 716 mat_mkl_pardiso->a, 717 mat_mkl_pardiso->ia, 718 mat_mkl_pardiso->ja, 719 mat_mkl_pardiso->perm, 720 &mat_mkl_pardiso->nrhs, 721 mat_mkl_pardiso->iparm, 722 &mat_mkl_pardiso->msglvl, 723 (void*)barray, 724 (void*)xarray, 725 &mat_mkl_pardiso->err); 726 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 727 728 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 729 PetscScalar *o_schur_work = NULL; 730 PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale; 731 PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs; 732 733 /* allocate extra memory if it is needed */ 734 scale = 1; 735 if (mat_mkl_pardiso->schur_inverted) { 736 scale = 2; 737 } 738 mem *= scale; 739 if (mem > mat_mkl_pardiso->schur_work_size) { 740 o_schur_work = mat_mkl_pardiso->schur_work; 741 ierr = PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 742 } 743 744 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 745 if (!mat_mkl_pardiso->schur_inverted) shift = 0; 746 747 /* solve Schur complement */ 748 if (!mat_mkl_pardiso->solve_interior) { 749 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 750 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 751 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 752 } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substitued to xarray[schur,n] will be neglected */ 753 PetscInt i,n,m=0; 754 for (n=0;n<mat_mkl_pardiso->nrhs;n++) { 755 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 756 xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.; 757 } 758 m += mat_mkl_pardiso->n; 759 } 760 } 761 /* expansion phase */ 762 mat_mkl_pardiso->iparm[6-1] = 1; 763 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 764 MKL_PARDISO (mat_mkl_pardiso->pt, 765 &mat_mkl_pardiso->maxfct, 766 &mat_mkl_pardiso->mnum, 767 &mat_mkl_pardiso->mtype, 768 &mat_mkl_pardiso->phase, 769 &mat_mkl_pardiso->n, 770 mat_mkl_pardiso->a, 771 mat_mkl_pardiso->ia, 772 mat_mkl_pardiso->ja, 773 mat_mkl_pardiso->perm, 774 &mat_mkl_pardiso->nrhs, 775 mat_mkl_pardiso->iparm, 776 &mat_mkl_pardiso->msglvl, 777 (void*)xarray, 778 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 779 &mat_mkl_pardiso->err); 780 if (o_schur_work) { /* restore original schur_work (minimal size) */ 781 ierr = PetscFree(mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 782 mat_mkl_pardiso->schur_work = o_schur_work; 783 } 784 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 785 mat_mkl_pardiso->iparm[6-1] = 0; 786 } 787 } 788 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatFactorNumeric_MKL_PARDISO" 794 PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info) 795 { 796 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->data; 797 PetscErrorCode ierr; 798 799 PetscFunctionBegin; 800 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 801 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 802 803 mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION; 804 MKL_PARDISO (mat_mkl_pardiso->pt, 805 &mat_mkl_pardiso->maxfct, 806 &mat_mkl_pardiso->mnum, 807 &mat_mkl_pardiso->mtype, 808 &mat_mkl_pardiso->phase, 809 &mat_mkl_pardiso->n, 810 mat_mkl_pardiso->a, 811 mat_mkl_pardiso->ia, 812 mat_mkl_pardiso->ja, 813 mat_mkl_pardiso->perm, 814 &mat_mkl_pardiso->nrhs, 815 mat_mkl_pardiso->iparm, 816 &mat_mkl_pardiso->msglvl, 817 NULL, 818 (void*)mat_mkl_pardiso->schur, 819 &mat_mkl_pardiso->err); 820 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 821 822 if (mat_mkl_pardiso->schur) { /* schur output from pardiso is in row major format */ 823 PetscInt j,k,n=mat_mkl_pardiso->schur_size; 824 if (!mat_mkl_pardiso->schur_solver_type) { 825 for (j=0; j<n; j++) { 826 for (k=0; k<j; k++) { 827 PetscScalar tmp = mat_mkl_pardiso->schur[j + k*n]; 828 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 829 mat_mkl_pardiso->schur[k + j*n] = tmp; 830 } 831 } 832 } else { /* we could use row-major in LAPACK routines (e.g. use 'U' instead of 'L'; instead, I prefer consistency between data structures and swap to column major */ 833 for (j=0; j<n; j++) { 834 for (k=0; k<j; k++) { 835 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 836 } 837 } 838 } 839 } 840 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 841 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 842 mat_mkl_pardiso->schur_factored = PETSC_FALSE; 843 mat_mkl_pardiso->schur_inverted = PETSC_FALSE; 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "PetscSetMKL_PARDISOFromOptions" 849 PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A) 850 { 851 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data; 852 PetscErrorCode ierr; 853 PetscInt icntl,threads=1; 854 PetscBool flg; 855 856 PetscFunctionBegin; 857 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");CHKERRQ(ierr); 858 859 ierr = PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);CHKERRQ(ierr); 860 if (flg) PetscSetMKL_PARDISOThreads((int)threads); 861 862 ierr = PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 863 if (flg) mat_mkl_pardiso->maxfct = icntl; 864 865 ierr = PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 866 if (flg) mat_mkl_pardiso->mnum = icntl; 867 868 ierr = PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 869 if (flg) mat_mkl_pardiso->msglvl = icntl; 870 871 ierr = PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 872 if(flg){ 873 void *pt[IPARM_SIZE]; 874 mat_mkl_pardiso->mtype = icntl; 875 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 876 #if defined(PETSC_USE_REAL_SINGLE) 877 mat_mkl_pardiso->iparm[27] = 1; 878 #else 879 mat_mkl_pardiso->iparm[27] = 0; 880 #endif 881 mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */ 882 } 883 ierr = PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 884 885 if(flg && icntl != 0){ 886 ierr = PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 887 if (flg) mat_mkl_pardiso->iparm[1] = icntl; 888 889 ierr = PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 890 if (flg) mat_mkl_pardiso->iparm[3] = icntl; 891 892 ierr = PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 893 if (flg) mat_mkl_pardiso->iparm[4] = icntl; 894 895 ierr = PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 896 if (flg) mat_mkl_pardiso->iparm[5] = icntl; 897 898 ierr = PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 899 if (flg) mat_mkl_pardiso->iparm[7] = icntl; 900 901 ierr = PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 902 if (flg) mat_mkl_pardiso->iparm[9] = icntl; 903 904 ierr = PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 905 if (flg) mat_mkl_pardiso->iparm[10] = icntl; 906 907 ierr = PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 908 if (flg) mat_mkl_pardiso->iparm[11] = icntl; 909 910 ierr = PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);CHKERRQ(ierr); 911 if (flg) mat_mkl_pardiso->iparm[12] = icntl; 912 913 ierr = PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);CHKERRQ(ierr); 914 if (flg) mat_mkl_pardiso->iparm[17] = icntl; 915 916 ierr = PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 917 if (flg) mat_mkl_pardiso->iparm[18] = icntl; 918 919 ierr = PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 920 if (flg) mat_mkl_pardiso->iparm[20] = icntl; 921 922 ierr = PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 923 if (flg) mat_mkl_pardiso->iparm[23] = icntl; 924 925 ierr = PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 926 if (flg) mat_mkl_pardiso->iparm[24] = icntl; 927 928 ierr = PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 929 if (flg) mat_mkl_pardiso->iparm[26] = icntl; 930 931 ierr = PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 932 if (flg) mat_mkl_pardiso->iparm[30] = icntl; 933 934 ierr = PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 935 if (flg) mat_mkl_pardiso->iparm[33] = icntl; 936 937 ierr = PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 938 if (flg) mat_mkl_pardiso->iparm[59] = icntl; 939 } 940 PetscOptionsEnd(); 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatFactorMKL_PARDISOInitialize_Private" 946 PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso) 947 { 948 PetscErrorCode ierr; 949 PetscInt i; 950 951 PetscFunctionBegin; 952 for ( i = 0; i < IPARM_SIZE; i++ ){ 953 mat_mkl_pardiso->iparm[i] = 0; 954 } 955 for ( i = 0; i < IPARM_SIZE; i++ ){ 956 mat_mkl_pardiso->pt[i] = 0; 957 } 958 /* Default options for both sym and unsym */ 959 mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 960 mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */ 961 mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */ 962 mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */ 963 mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 964 mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 965 #if 0 966 mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/ 967 #endif 968 mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ 969 mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */ 970 971 mat_mkl_pardiso->CleanUp = PETSC_FALSE; 972 mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */ 973 mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */ 974 mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */ 975 mat_mkl_pardiso->phase = -1; 976 mat_mkl_pardiso->err = 0; 977 978 mat_mkl_pardiso->n = A->rmap->N; 979 mat_mkl_pardiso->nrhs = 1; 980 mat_mkl_pardiso->err = 0; 981 mat_mkl_pardiso->phase = -1; 982 983 if(ftype == MAT_FACTOR_LU){ 984 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 985 mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 986 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 987 988 } else { 989 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 990 mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */ 991 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 992 /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */ 993 #if defined(PETSC_USE_DEBUG) 994 mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */ 995 #endif 996 } 997 ierr = PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);CHKERRQ(ierr); 998 for(i = 0; i < A->rmap->N; i++){ 999 mat_mkl_pardiso->perm[i] = 0; 1000 } 1001 mat_mkl_pardiso->schur_size = 0; 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatFactorSymbolic_AIJMKL_PARDISO_Private" 1007 PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info) 1008 { 1009 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 1014 ierr = PetscSetMKL_PARDISOFromOptions(F,A);CHKERRQ(ierr); 1015 1016 /* throw away any previously computed structure */ 1017 if (mat_mkl_pardiso->freeaij) { 1018 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 1019 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 1020 } 1021 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 1022 mat_mkl_pardiso->n = A->rmap->N; 1023 1024 mat_mkl_pardiso->phase = JOB_ANALYSIS; 1025 1026 MKL_PARDISO (mat_mkl_pardiso->pt, 1027 &mat_mkl_pardiso->maxfct, 1028 &mat_mkl_pardiso->mnum, 1029 &mat_mkl_pardiso->mtype, 1030 &mat_mkl_pardiso->phase, 1031 &mat_mkl_pardiso->n, 1032 mat_mkl_pardiso->a, 1033 mat_mkl_pardiso->ia, 1034 mat_mkl_pardiso->ja, 1035 mat_mkl_pardiso->perm, 1036 &mat_mkl_pardiso->nrhs, 1037 mat_mkl_pardiso->iparm, 1038 &mat_mkl_pardiso->msglvl, 1039 NULL, 1040 NULL, 1041 &mat_mkl_pardiso->err); 1042 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d\n. Please check manual",mat_mkl_pardiso->err); 1043 1044 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 1045 1046 if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO; 1047 else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO; 1048 1049 F->ops->solve = MatSolve_MKL_PARDISO; 1050 F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO; 1051 F->ops->matsolve = MatMatSolve_MKL_PARDISO; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_PARDISO" 1057 PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1058 { 1059 PetscErrorCode ierr; 1060 1061 PetscFunctionBegin; 1062 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #if !defined(PETSC_USE_COMPLEX) 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetInertia_MKL_PARDISO" 1069 PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F,int *nneg,int *nzero,int *npos) 1070 { 1071 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)F->data; 1072 1073 PetscFunctionBegin; 1074 if (nneg) *nneg = mat_mkl_pardiso->iparm[22]; 1075 if (npos) *npos = mat_mkl_pardiso->iparm[21]; 1076 if (nzero) *nzero = F->rmap->N -(mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]); 1077 PetscFunctionReturn(0); 1078 } 1079 #endif 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatCholeskyFactorSymbolic_AIJMKL_PARDISO" 1083 PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info) 1084 { 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1089 #if defined(PETSC_USE_COMPLEX) 1090 F->ops->getinertia = NULL; 1091 #else 1092 F->ops->getinertia = MatGetInertia_MKL_PARDISO; 1093 #endif 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatView_MKL_PARDISO" 1099 PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer) 1100 { 1101 PetscErrorCode ierr; 1102 PetscBool iascii; 1103 PetscViewerFormat format; 1104 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 1105 PetscInt i; 1106 1107 PetscFunctionBegin; 1108 if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(0); 1109 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 if (iascii) { 1112 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1113 if (format == PETSC_VIEWER_ASCII_INFO) { 1114 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");CHKERRQ(ierr); 1115 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);CHKERRQ(ierr); 1116 for(i = 1; i <= 64; i++){ 1117 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);CHKERRQ(ierr); 1118 } 1119 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);CHKERRQ(ierr); 1120 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);CHKERRQ(ierr); 1121 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);CHKERRQ(ierr); 1122 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);CHKERRQ(ierr); 1123 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 1124 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);CHKERRQ(ierr); 1125 } 1126 } 1127 PetscFunctionReturn(0); 1128 } 1129 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatGetInfo_MKL_PARDISO" 1133 PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info) 1134 { 1135 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->data; 1136 1137 PetscFunctionBegin; 1138 info->block_size = 1.0; 1139 info->nz_used = mat_mkl_pardiso->nz; 1140 info->nz_allocated = mat_mkl_pardiso->nz; 1141 info->nz_unneeded = 0.0; 1142 info->assemblies = 0.0; 1143 info->mallocs = 0.0; 1144 info->memory = 0.0; 1145 info->fill_ratio_given = 0; 1146 info->fill_ratio_needed = 0; 1147 info->factor_mallocs = 0; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatMkl_PardisoSetCntl_MKL_PARDISO" 1153 PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival) 1154 { 1155 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->data; 1156 1157 PetscFunctionBegin; 1158 if(icntl <= 64){ 1159 mat_mkl_pardiso->iparm[icntl - 1] = ival; 1160 } else { 1161 if(icntl == 65) PetscSetMKL_PARDISOThreads(ival); 1162 else if(icntl == 66) mat_mkl_pardiso->maxfct = ival; 1163 else if(icntl == 67) mat_mkl_pardiso->mnum = ival; 1164 else if(icntl == 68) mat_mkl_pardiso->msglvl = ival; 1165 else if(icntl == 69){ 1166 void *pt[IPARM_SIZE]; 1167 mat_mkl_pardiso->mtype = ival; 1168 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 1169 #if defined(PETSC_USE_REAL_SINGLE) 1170 mat_mkl_pardiso->iparm[27] = 1; 1171 #else 1172 mat_mkl_pardiso->iparm[27] = 0; 1173 #endif 1174 mat_mkl_pardiso->iparm[34] = 1; 1175 } else if(icntl==70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival; 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatMkl_PardisoSetCntl" 1182 /*@ 1183 MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters 1184 1185 Logically Collective on Mat 1186 1187 Input Parameters: 1188 + F - the factored matrix obtained by calling MatGetFactor() 1189 . icntl - index of Mkl_Pardiso parameter 1190 - ival - value of Mkl_Pardiso parameter 1191 1192 Options Database: 1193 . -mat_mkl_pardiso_<icntl> <ival> 1194 1195 Level: beginner 1196 1197 References: 1198 . Mkl_Pardiso Users' Guide 1199 1200 .seealso: MatGetFactor() 1201 @*/ 1202 PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 1203 { 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 ierr = PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /*MC 1212 MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for 1213 sequential matrices via the external package MKL_PARDISO. 1214 1215 Works with MATSEQAIJ matrices 1216 1217 Use -pc_type lu -pc_factor_mat_solver_package mkl_pardiso to us this direct solver 1218 1219 Options Database Keys: 1220 + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO 1221 . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 1222 . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase 1223 . -mat_mkl_pardiso_68 - Message level information 1224 . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 1225 . -mat_mkl_pardiso_1 - Use default values 1226 . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix 1227 . -mat_mkl_pardiso_4 - Preconditioned CGS/CG 1228 . -mat_mkl_pardiso_5 - User permutation 1229 . -mat_mkl_pardiso_6 - Write solution on x 1230 . -mat_mkl_pardiso_8 - Iterative refinement step 1231 . -mat_mkl_pardiso_10 - Pivoting perturbation 1232 . -mat_mkl_pardiso_11 - Scaling vectors 1233 . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A 1234 . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching 1235 . -mat_mkl_pardiso_18 - Numbers of non-zero elements 1236 . -mat_mkl_pardiso_19 - Report number of floating point operations 1237 . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices 1238 . -mat_mkl_pardiso_24 - Parallel factorization control 1239 . -mat_mkl_pardiso_25 - Parallel forward/backward solve control 1240 . -mat_mkl_pardiso_27 - Matrix checker 1241 . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors 1242 . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 1243 - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode 1244 1245 Level: beginner 1246 1247 For more information please check mkl_pardiso manual 1248 1249 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage 1250 1251 M*/ 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_pardiso" 1254 static PetscErrorCode MatFactorGetSolverPackage_mkl_pardiso(Mat A, const MatSolverPackage *type) 1255 { 1256 PetscFunctionBegin; 1257 *type = MATSOLVERMKL_PARDISO; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetFactor_aij_mkl_pardiso" 1263 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F) 1264 { 1265 Mat B; 1266 PetscErrorCode ierr; 1267 Mat_MKL_PARDISO *mat_mkl_pardiso; 1268 PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 1272 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 1273 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 1274 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1275 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1276 ierr = PetscStrallocpy("mkl_pardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 1277 ierr = MatSetUp(B);CHKERRQ(ierr); 1278 1279 ierr = PetscNewLog(B,&mat_mkl_pardiso);CHKERRQ(ierr); 1280 B->data = mat_mkl_pardiso; 1281 1282 ierr = MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);CHKERRQ(ierr); 1283 if (ftype == MAT_FACTOR_LU) { 1284 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO; 1285 B->factortype = MAT_FACTOR_LU; 1286 mat_mkl_pardiso->needsym = PETSC_FALSE; 1287 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1288 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1289 else if (isSeqSBAIJ) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead"); 1290 else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name); 1291 #if defined(PETSC_USE_COMPLEX) 1292 mat_mkl_pardiso->mtype = 13; 1293 #else 1294 if (A->structurally_symmetric) mat_mkl_pardiso->mtype = 1; 1295 else mat_mkl_pardiso->mtype = 11; 1296 #endif 1297 mat_mkl_pardiso->schur_solver_type = 0; 1298 } else { 1299 #if defined(PETSC_USE_COMPLEX) 1300 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name); 1301 #endif 1302 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO; 1303 B->factortype = MAT_FACTOR_CHOLESKY; 1304 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1305 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1306 else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij; 1307 else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name); 1308 1309 mat_mkl_pardiso->needsym = PETSC_TRUE; 1310 if (A->spd_set && A->spd) { 1311 mat_mkl_pardiso->schur_solver_type = 1; 1312 mat_mkl_pardiso->mtype = 2; 1313 } else { 1314 mat_mkl_pardiso->schur_solver_type = 2; 1315 mat_mkl_pardiso->mtype = -2; 1316 } 1317 } 1318 B->ops->destroy = MatDestroy_MKL_PARDISO; 1319 B->ops->view = MatView_MKL_PARDISO; 1320 B->factortype = ftype; 1321 B->ops->getinfo = MatGetInfo_MKL_PARDISO; 1322 B->assembled = PETSC_TRUE; 1323 1324 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 1325 ierr = PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);CHKERRQ(ierr); 1326 1327 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_pardiso);CHKERRQ(ierr); 1328 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);CHKERRQ(ierr); 1329 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1330 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSchurComplement_C",MatFactorGetSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1331 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorInvertSchurComplement_C",MatFactorInvertSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1332 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplement_C",MatFactorSolveSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1333 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplementTranspose_C",MatFactorSolveSchurComplementTranspose_MKL_PARDISO);CHKERRQ(ierr); 1334 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);CHKERRQ(ierr); 1335 1336 *F = B; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatSolverPackageRegister_MKL_Pardiso" 1342 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_Pardiso(void) 1343 { 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1348 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1349 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353