1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/sbaij/seq/sbaij.h> 7 #include <../src/mat/impls/dense/seq/dense.h> 8 #include <petscblaslapack.h> 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <math.h> 13 #include <mkl_pardiso.h> 14 15 PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int); 16 17 /* 18 * Possible mkl_pardiso phases that controls the execution of the solver. 19 * For more information check mkl_pardiso manual. 20 */ 21 #define JOB_ANALYSIS 11 22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 24 #define JOB_NUMERICAL_FACTORIZATION 22 25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 30 #define JOB_RELEASE_OF_LU_MEMORY 0 31 #define JOB_RELEASE_OF_ALL_MEMORY -1 32 33 #define IPARM_SIZE 64 34 35 #if defined(PETSC_USE_64BIT_INDICES) 36 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 37 /* sizeof(MKL_INT) == sizeof(long long int) if ilp64*/ 38 #define INT_TYPE long long int 39 #define MKL_PARDISO pardiso 40 #define MKL_PARDISO_INIT pardisoinit 41 #else 42 #define INT_TYPE long long int 43 #define MKL_PARDISO pardiso_64 44 #define MKL_PARDISO_INIT pardiso_64init 45 #endif 46 #else 47 #define INT_TYPE int 48 #define MKL_PARDISO pardiso 49 #define MKL_PARDISO_INIT pardisoinit 50 #endif 51 52 53 /* 54 * Internal data structure. 55 * For more information check mkl_pardiso manual. 56 */ 57 typedef struct { 58 59 /* Configuration vector*/ 60 INT_TYPE iparm[IPARM_SIZE]; 61 62 /* 63 * Internal mkl_pardiso memory location. 64 * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak. 65 */ 66 void *pt[IPARM_SIZE]; 67 68 /* Basic mkl_pardiso info*/ 69 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 70 71 /* Matrix structure*/ 72 void *a; 73 INT_TYPE *ia, *ja; 74 75 /* Number of non-zero elements*/ 76 INT_TYPE nz; 77 78 /* Row permutaton vector*/ 79 INT_TYPE *perm; 80 81 /* Define if matrix preserves sparse structure.*/ 82 MatStructure matstruc; 83 84 PetscBool needsym; 85 PetscBool freeaij; 86 87 /* Schur complement */ 88 PetscScalar *schur; 89 PetscInt schur_size; 90 PetscInt *schur_idxs; 91 PetscScalar *schur_work; 92 PetscBLASInt schur_work_size; 93 PetscInt schur_solver_type; 94 PetscInt *schur_pivots; 95 PetscBool schur_factored; 96 PetscBool schur_inverted; 97 PetscBool solve_interior; 98 99 /* True if mkl_pardiso function have been used.*/ 100 PetscBool CleanUp; 101 102 /* Conversion to a format suitable for MKL */ 103 PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**); 104 } Mat_MKL_PARDISO; 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMKLPardiso_Convert_seqsbaij" 108 PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 109 { 110 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 111 PetscInt bs = A->rmap->bs; 112 113 PetscFunctionBegin; 114 if (!sym) { 115 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen"); 116 } 117 if (bs == 1) { /* already in the correct format */ 118 *v = aa->a; 119 *r = aa->i; 120 *c = aa->j; 121 *nnz = aa->nz; 122 *free = PETSC_FALSE; 123 } else { 124 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented"); 125 } 126 PetscFunctionReturn(0); 127 } 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatMKLPardiso_Convert_seqbaij" 131 PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 132 { 133 PetscFunctionBegin; 134 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented"); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatMKLPardiso_Convert_seqaij" 140 PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 141 { 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 if (!sym) { /* already in the correct format */ 147 *v = aa->a; 148 *r = aa->i; 149 *c = aa->j; 150 *nnz = aa->nz; 151 *free = PETSC_FALSE; 152 PetscFunctionReturn(0); 153 } 154 /* need to get the triangular part */ 155 if (reuse == MAT_INITIAL_MATRIX) { 156 PetscScalar *vals,*vv; 157 PetscInt *row,*col,*jj; 158 PetscInt m = A->rmap->n,nz,i; 159 160 nz = 0; 161 for (i=0; i<m; i++) { 162 nz += aa->i[i+1] - aa->diag[i]; 163 } 164 ierr = PetscMalloc2(m+1,&row,nz,&col);CHKERRQ(ierr); 165 ierr = PetscMalloc1(nz,&vals);CHKERRQ(ierr); 166 jj = col; 167 vv = vals; 168 169 row[0] = 0; 170 for (i=0; i<m; i++) { 171 PetscInt *aj = aa->j + aa->diag[i]; 172 PetscScalar *av = aa->a + aa->diag[i]; 173 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 174 for (j=0; j<rl; j++) { 175 *jj = *aj; jj++; aj++; 176 *vv = *av; vv++; av++; 177 } 178 row[i+1] = row[i] + rl; 179 } 180 *v = vals; 181 *r = row; 182 *c = col; 183 *nnz = nz; 184 *free = PETSC_TRUE; 185 } else { 186 PetscScalar *vv; 187 PetscInt m = A->rmap->n,i; 188 189 vv = *v; 190 for (i=0; i<m; i++) { 191 PetscScalar *av = aa->a + aa->diag[i]; 192 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 193 for (j=0; j<rl; j++) { 194 *vv = *av; vv++; av++; 195 } 196 } 197 *free = PETSC_TRUE; 198 } 199 PetscFunctionReturn(0); 200 } 201 202 void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm []) 203 { 204 int iparm_copy[IPARM_SIZE], mtype_copy, i; 205 206 mtype_copy = *mtype; 207 pardisoinit(pt, &mtype_copy, iparm_copy); 208 for(i = 0; i < IPARM_SIZE; i++){ 209 iparm[i] = iparm_copy[i]; 210 } 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMKLPardisoFactorSchur_Private" 215 static PetscErrorCode MatMKLPardisoFactorSchur_Private(Mat_MKL_PARDISO* mpardiso) 216 { 217 PetscBLASInt B_N,B_ierr; 218 PetscScalar *work,val; 219 PetscBLASInt lwork = -1; 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 if (mpardiso->schur_factored) { 224 PetscFunctionReturn(0); 225 } 226 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 227 switch (mpardiso->schur_solver_type) { 228 case 1: /* hermitian solver */ 229 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 230 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 231 ierr = PetscFPTrapPop();CHKERRQ(ierr); 232 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 233 break; 234 case 2: /* symmetric */ 235 if (!mpardiso->schur_pivots) { 236 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 237 } 238 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 239 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&val,&lwork,&B_ierr)); 240 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(val),&lwork);CHKERRQ(ierr); 241 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 242 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,work,&lwork,&B_ierr)); 243 ierr = PetscFree(work);CHKERRQ(ierr); 244 ierr = PetscFPTrapPop();CHKERRQ(ierr); 245 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 246 break; 247 default: /* general */ 248 if (!mpardiso->schur_pivots) { 249 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 250 } 251 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 252 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&B_ierr)); 253 ierr = PetscFPTrapPop();CHKERRQ(ierr); 254 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 255 break; 256 } 257 mpardiso->schur_factored = PETSC_TRUE; 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatMKLPardisoInvertSchur_Private" 263 static PetscErrorCode MatMKLPardisoInvertSchur_Private(Mat_MKL_PARDISO* mpardiso) 264 { 265 PetscBLASInt B_N,B_ierr; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 270 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 271 switch (mpardiso->schur_solver_type) { 272 case 1: /* hermitian solver */ 273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 274 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 275 ierr = PetscFPTrapPop();CHKERRQ(ierr); 276 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 277 break; 278 case 2: /* symmetric */ 279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 280 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&B_ierr)); 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 283 break; 284 default: /* general */ 285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 286 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr)); 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 289 break; 290 } 291 mpardiso->schur_inverted = PETSC_TRUE; 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatMKLPardisoSolveSchur_Private" 297 static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat_MKL_PARDISO* mpardiso, PetscScalar *B, PetscScalar *X) 298 { 299 PetscScalar one=1.,zero=0.,*schur_rhs,*schur_sol; 300 PetscBLASInt B_N,B_Nrhs,B_ierr; 301 char type[2]; 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 306 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 307 ierr = PetscBLASIntCast(mpardiso->nrhs,&B_Nrhs);CHKERRQ(ierr); 308 if (X == B && mpardiso->schur_inverted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address"); 309 if (X != B) { /* using LAPACK *TRS subroutines */ 310 ierr = PetscMemcpy(X,B,B_N*B_Nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 311 } 312 schur_rhs = B; 313 schur_sol = X; 314 switch (mpardiso->schur_solver_type) { 315 case 1: /* hermitian solver */ 316 if (mpardiso->schur_inverted) { /* BLAShemm should go here */ 317 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 318 } else { 319 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 320 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,schur_sol,&B_N,&B_ierr)); 321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 322 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRS Lapack routine %d",(int)B_ierr); 323 } 324 break; 325 case 2: /* symmetric solver */ 326 if (mpardiso->schur_inverted) { 327 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 328 } else { 329 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 330 PetscStackCallBLAS("LAPACKsytrs",LAPACKsytrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 331 ierr = PetscFPTrapPop();CHKERRQ(ierr); 332 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRS Lapack routine %d",(int)B_ierr); 333 } 334 break; 335 default: /* general */ 336 switch (mpardiso->iparm[12-1]) { 337 case 1: 338 sprintf(type,"C"); 339 break; 340 case 2: 341 sprintf(type,"T"); 342 break; 343 default: 344 sprintf(type,"N"); 345 break; 346 } 347 if (mpardiso->schur_inverted) { 348 PetscStackCallBLAS("BLASgemm",BLASgemm_(type,"N",&B_N,&B_Nrhs,&B_N,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 349 } else { 350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 351 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_(type,&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 353 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRS Lapack routine %d",(int)B_ierr); 354 } 355 break; 356 } 357 PetscFunctionReturn(0); 358 } 359 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatFactorSetSchurIS_MKL_PARDISO" 363 PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is) 364 { 365 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 366 const PetscInt *idxs; 367 PetscInt size,i; 368 PetscMPIInt csize; 369 PetscBool sorted; 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);CHKERRQ(ierr); 374 if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc\n"); 375 ierr = ISSorted(is,&sorted);CHKERRQ(ierr); 376 if (!sorted) { 377 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted\n"); 378 } 379 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 380 if (mpardiso->schur_size != size) { 381 mpardiso->schur_size = size; 382 ierr = PetscFree2(mpardiso->schur,mpardiso->schur_work);CHKERRQ(ierr); 383 ierr = PetscFree(mpardiso->schur_idxs);CHKERRQ(ierr); 384 ierr = PetscFree(mpardiso->schur_pivots);CHKERRQ(ierr); 385 ierr = PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);CHKERRQ(ierr); 386 ierr = PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);CHKERRQ(ierr); 387 ierr = PetscMalloc1(size,&mpardiso->schur_idxs);CHKERRQ(ierr); 388 } 389 ierr = PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));CHKERRQ(ierr); 390 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 391 ierr = PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 392 for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1; 393 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 394 if (size) { /* turn on Schur switch if we the set of indices is not empty */ 395 mpardiso->iparm[36-1] = 2; 396 } 397 mpardiso->schur_factored = PETSC_FALSE; 398 mpardiso->schur_inverted = PETSC_FALSE; 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatFactorCreateSchurComplement_MKL_PARDISO" 404 PetscErrorCode MatFactorCreateSchurComplement_MKL_PARDISO(Mat F,Mat* S) 405 { 406 Mat St; 407 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 408 PetscScalar *array; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 413 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 414 415 ierr = MatCreate(PetscObjectComm((PetscObject)F),&St);CHKERRQ(ierr); 416 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mpardiso->schur_size,mpardiso->schur_size);CHKERRQ(ierr); 417 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 418 ierr = MatSetUp(St);CHKERRQ(ierr); 419 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 420 ierr = PetscMemcpy(array,mpardiso->schur,mpardiso->schur_size*mpardiso->schur_size*sizeof(PetscScalar));CHKERRQ(ierr); 421 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 422 *S = St; 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatFactorGetSchurComplement_MKL_PARDISO" 428 PetscErrorCode MatFactorGetSchurComplement_MKL_PARDISO(Mat F,Mat* S) 429 { 430 Mat St; 431 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 436 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 437 438 ierr = MatCreateSeqDense(PetscObjectComm((PetscObject)F),mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&St);CHKERRQ(ierr); 439 *S = St; 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatFactorInvertSchurComplement_MKL_PARDISO" 445 PetscErrorCode MatFactorInvertSchurComplement_MKL_PARDISO(Mat F) 446 { 447 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 if (!mpardiso->iparm[36-1]) { /* do nothing */ 452 PetscFunctionReturn(0); 453 } 454 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 455 ierr = MatMKLPardisoInvertSchur_Private(mpardiso);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatFactorFactorizeSchurComplement_MKL_PARDISO" 461 PetscErrorCode MatFactorFactorizeSchurComplement_MKL_PARDISO(Mat F) 462 { 463 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 if (!mpardiso->iparm[36-1]) { /* do nothing */ 468 PetscFunctionReturn(0); 469 } 470 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 471 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatFactorSolveSchurComplement_MKL_PARDISO" 477 PetscErrorCode MatFactorSolveSchurComplement_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 478 { 479 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 480 PetscScalar *asol,*arhs; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 485 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 486 487 mpardiso->nrhs = 1; 488 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 489 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 490 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 492 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatFactorSolveSchurComplementTranspose_MKL_PARDISO" 498 PetscErrorCode MatFactorSolveSchurComplementTranspose_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 499 { 500 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 501 PetscScalar *asol,*arhs; 502 PetscInt oiparm12; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 507 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 508 509 mpardiso->nrhs = 1; 510 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 511 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 512 oiparm12 = mpardiso->iparm[12 - 1]; 513 mpardiso->iparm[12 - 1] = 2; 514 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 515 mpardiso->iparm[12 - 1] = oiparm12; 516 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 517 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatFactorSetSchurComplementSolverType_MKL_PARDISO" 523 PetscErrorCode MatFactorSetSchurComplementSolverType_MKL_PARDISO(Mat F, PetscInt sym) 524 { 525 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data; 526 527 PetscFunctionBegin; 528 if (mpardiso->schur_factored && sym != mpardiso->schur_solver_type) { 529 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Cannot change the Schur solver! Schur complement data has been already factored"); 530 } 531 mpardiso->schur_solver_type = sym; 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatDestroy_MKL_PARDISO" 537 PetscErrorCode MatDestroy_MKL_PARDISO(Mat A) 538 { 539 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 540 PetscErrorCode ierr; 541 542 PetscFunctionBegin; 543 if (mat_mkl_pardiso->CleanUp) { 544 mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 545 546 MKL_PARDISO (mat_mkl_pardiso->pt, 547 &mat_mkl_pardiso->maxfct, 548 &mat_mkl_pardiso->mnum, 549 &mat_mkl_pardiso->mtype, 550 &mat_mkl_pardiso->phase, 551 &mat_mkl_pardiso->n, 552 NULL, 553 NULL, 554 NULL, 555 NULL, 556 &mat_mkl_pardiso->nrhs, 557 mat_mkl_pardiso->iparm, 558 &mat_mkl_pardiso->msglvl, 559 NULL, 560 NULL, 561 &mat_mkl_pardiso->err); 562 } 563 ierr = PetscFree(mat_mkl_pardiso->perm);CHKERRQ(ierr); 564 ierr = PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 565 ierr = PetscFree(mat_mkl_pardiso->schur_idxs);CHKERRQ(ierr); 566 ierr = PetscFree(mat_mkl_pardiso->schur_pivots);CHKERRQ(ierr); 567 if (mat_mkl_pardiso->freeaij) { 568 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 569 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(A->data);CHKERRQ(ierr); 572 573 /* clear composed functions */ 574 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSchurComplement_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorInvertSchurComplement_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorFactorizeSchurComplement_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplement_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplementTranspose_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurComplementSolverType_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatMKLPardisoScatterSchur_Private" 589 static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce) 590 { 591 PetscFunctionBegin; 592 if (reduce) { /* data given for the whole matrix */ 593 PetscInt i,m=0,p=0; 594 for (i=0;i<mpardiso->nrhs;i++) { 595 PetscInt j; 596 for (j=0;j<mpardiso->schur_size;j++) { 597 schur[p+j] = whole[m+mpardiso->schur_idxs[j]]; 598 } 599 m += mpardiso->n; 600 p += mpardiso->schur_size; 601 } 602 } else { /* from Schur to whole */ 603 PetscInt i,m=0,p=0; 604 for (i=0;i<mpardiso->nrhs;i++) { 605 PetscInt j; 606 for (j=0;j<mpardiso->schur_size;j++) { 607 whole[m+mpardiso->schur_idxs[j]] = schur[p+j]; 608 } 609 m += mpardiso->n; 610 p += mpardiso->schur_size; 611 } 612 } 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatSolve_MKL_PARDISO" 618 PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x) 619 { 620 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data; 621 PetscErrorCode ierr; 622 PetscScalar *xarray; 623 const PetscScalar *barray; 624 625 PetscFunctionBegin; 626 mat_mkl_pardiso->nrhs = 1; 627 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 628 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 629 630 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 631 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 632 633 if (barray == xarray) { /* if the two vectors share the same memory */ 634 PetscScalar *work; 635 if (!mat_mkl_pardiso->schur_work) { 636 ierr = PetscMalloc1(mat_mkl_pardiso->n,&work);CHKERRQ(ierr); 637 } else { 638 work = mat_mkl_pardiso->schur_work; 639 } 640 mat_mkl_pardiso->iparm[6-1] = 1; 641 MKL_PARDISO (mat_mkl_pardiso->pt, 642 &mat_mkl_pardiso->maxfct, 643 &mat_mkl_pardiso->mnum, 644 &mat_mkl_pardiso->mtype, 645 &mat_mkl_pardiso->phase, 646 &mat_mkl_pardiso->n, 647 mat_mkl_pardiso->a, 648 mat_mkl_pardiso->ia, 649 mat_mkl_pardiso->ja, 650 NULL, 651 &mat_mkl_pardiso->nrhs, 652 mat_mkl_pardiso->iparm, 653 &mat_mkl_pardiso->msglvl, 654 (void*)xarray, 655 (void*)work, 656 &mat_mkl_pardiso->err); 657 if (!mat_mkl_pardiso->schur_work) { 658 ierr = PetscFree(work);CHKERRQ(ierr); 659 } 660 } else { 661 mat_mkl_pardiso->iparm[6-1] = 0; 662 MKL_PARDISO (mat_mkl_pardiso->pt, 663 &mat_mkl_pardiso->maxfct, 664 &mat_mkl_pardiso->mnum, 665 &mat_mkl_pardiso->mtype, 666 &mat_mkl_pardiso->phase, 667 &mat_mkl_pardiso->n, 668 mat_mkl_pardiso->a, 669 mat_mkl_pardiso->ia, 670 mat_mkl_pardiso->ja, 671 mat_mkl_pardiso->perm, 672 &mat_mkl_pardiso->nrhs, 673 mat_mkl_pardiso->iparm, 674 &mat_mkl_pardiso->msglvl, 675 (void*)barray, 676 (void*)xarray, 677 &mat_mkl_pardiso->err); 678 } 679 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 680 681 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 682 683 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 684 PetscInt shift = mat_mkl_pardiso->schur_size; 685 686 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 687 if (!mat_mkl_pardiso->schur_inverted) { 688 shift = 0; 689 } 690 691 if (!mat_mkl_pardiso->solve_interior) { 692 /* solve Schur complement */ 693 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 694 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 695 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 696 } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substitued to xarray[schur] will be neglected */ 697 PetscInt i; 698 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 699 xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.; 700 } 701 } 702 703 /* expansion phase */ 704 mat_mkl_pardiso->iparm[6-1] = 1; 705 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 706 MKL_PARDISO (mat_mkl_pardiso->pt, 707 &mat_mkl_pardiso->maxfct, 708 &mat_mkl_pardiso->mnum, 709 &mat_mkl_pardiso->mtype, 710 &mat_mkl_pardiso->phase, 711 &mat_mkl_pardiso->n, 712 mat_mkl_pardiso->a, 713 mat_mkl_pardiso->ia, 714 mat_mkl_pardiso->ja, 715 mat_mkl_pardiso->perm, 716 &mat_mkl_pardiso->nrhs, 717 mat_mkl_pardiso->iparm, 718 &mat_mkl_pardiso->msglvl, 719 (void*)xarray, 720 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 721 &mat_mkl_pardiso->err); 722 723 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 724 mat_mkl_pardiso->iparm[6-1] = 0; 725 } 726 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 727 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatSolveTranspose_MKL_PARDISO" 733 PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x) 734 { 735 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 736 PetscInt oiparm12; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 oiparm12 = mat_mkl_pardiso->iparm[12 - 1]; 741 mat_mkl_pardiso->iparm[12 - 1] = 2; 742 ierr = MatSolve_MKL_PARDISO(A,b,x);CHKERRQ(ierr); 743 mat_mkl_pardiso->iparm[12 - 1] = oiparm12; 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatMatSolve_MKL_PARDISO" 749 PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X) 750 { 751 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data; 752 PetscErrorCode ierr; 753 PetscScalar *barray, *xarray; 754 PetscBool flg; 755 756 PetscFunctionBegin; 757 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 758 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 759 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 760 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 761 762 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 763 764 if (mat_mkl_pardiso->nrhs > 0) { 765 ierr = MatDenseGetArray(B,&barray); 766 ierr = MatDenseGetArray(X,&xarray); 767 768 if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location"); 769 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 770 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 771 mat_mkl_pardiso->iparm[6-1] = 0; 772 773 MKL_PARDISO (mat_mkl_pardiso->pt, 774 &mat_mkl_pardiso->maxfct, 775 &mat_mkl_pardiso->mnum, 776 &mat_mkl_pardiso->mtype, 777 &mat_mkl_pardiso->phase, 778 &mat_mkl_pardiso->n, 779 mat_mkl_pardiso->a, 780 mat_mkl_pardiso->ia, 781 mat_mkl_pardiso->ja, 782 mat_mkl_pardiso->perm, 783 &mat_mkl_pardiso->nrhs, 784 mat_mkl_pardiso->iparm, 785 &mat_mkl_pardiso->msglvl, 786 (void*)barray, 787 (void*)xarray, 788 &mat_mkl_pardiso->err); 789 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 790 791 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 792 PetscScalar *o_schur_work = NULL; 793 PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale; 794 PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs; 795 796 /* allocate extra memory if it is needed */ 797 scale = 1; 798 if (mat_mkl_pardiso->schur_inverted) { 799 scale = 2; 800 } 801 mem *= scale; 802 if (mem > mat_mkl_pardiso->schur_work_size) { 803 o_schur_work = mat_mkl_pardiso->schur_work; 804 ierr = PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 805 } 806 807 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 808 if (!mat_mkl_pardiso->schur_inverted) shift = 0; 809 810 /* solve Schur complement */ 811 if (!mat_mkl_pardiso->solve_interior) { 812 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 813 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 814 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 815 } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substitued to xarray[schur,n] will be neglected */ 816 PetscInt i,n,m=0; 817 for (n=0;n<mat_mkl_pardiso->nrhs;n++) { 818 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 819 xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.; 820 } 821 m += mat_mkl_pardiso->n; 822 } 823 } 824 825 /* expansion phase */ 826 mat_mkl_pardiso->iparm[6-1] = 1; 827 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 828 MKL_PARDISO (mat_mkl_pardiso->pt, 829 &mat_mkl_pardiso->maxfct, 830 &mat_mkl_pardiso->mnum, 831 &mat_mkl_pardiso->mtype, 832 &mat_mkl_pardiso->phase, 833 &mat_mkl_pardiso->n, 834 mat_mkl_pardiso->a, 835 mat_mkl_pardiso->ia, 836 mat_mkl_pardiso->ja, 837 mat_mkl_pardiso->perm, 838 &mat_mkl_pardiso->nrhs, 839 mat_mkl_pardiso->iparm, 840 &mat_mkl_pardiso->msglvl, 841 (void*)xarray, 842 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 843 &mat_mkl_pardiso->err); 844 if (o_schur_work) { /* restore original schur_work (minimal size) */ 845 ierr = PetscFree(mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 846 mat_mkl_pardiso->schur_work = o_schur_work; 847 } 848 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 849 mat_mkl_pardiso->iparm[6-1] = 0; 850 } 851 } 852 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 853 PetscFunctionReturn(0); 854 } 855 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatFactorNumeric_MKL_PARDISO" 858 PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info) 859 { 860 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->data; 861 PetscErrorCode ierr; 862 863 PetscFunctionBegin; 864 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 865 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 866 867 mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION; 868 MKL_PARDISO (mat_mkl_pardiso->pt, 869 &mat_mkl_pardiso->maxfct, 870 &mat_mkl_pardiso->mnum, 871 &mat_mkl_pardiso->mtype, 872 &mat_mkl_pardiso->phase, 873 &mat_mkl_pardiso->n, 874 mat_mkl_pardiso->a, 875 mat_mkl_pardiso->ia, 876 mat_mkl_pardiso->ja, 877 mat_mkl_pardiso->perm, 878 &mat_mkl_pardiso->nrhs, 879 mat_mkl_pardiso->iparm, 880 &mat_mkl_pardiso->msglvl, 881 NULL, 882 (void*)mat_mkl_pardiso->schur, 883 &mat_mkl_pardiso->err); 884 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 885 886 if (mat_mkl_pardiso->schur) { /* schur output from pardiso is in row major format */ 887 PetscInt j,k,n=mat_mkl_pardiso->schur_size; 888 if (!mat_mkl_pardiso->schur_solver_type) { 889 for (j=0; j<n; j++) { 890 for (k=0; k<j; k++) { 891 PetscScalar tmp = mat_mkl_pardiso->schur[j + k*n]; 892 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 893 mat_mkl_pardiso->schur[k + j*n] = tmp; 894 } 895 } 896 } else { /* we could use row-major in LAPACK routines (e.g. use 'U' instead of 'L'; instead, I prefer consistency between data structures and swap to column major */ 897 for (j=0; j<n; j++) { 898 for (k=0; k<j; k++) { 899 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 900 } 901 } 902 } 903 } 904 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 905 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 906 mat_mkl_pardiso->schur_factored = PETSC_FALSE; 907 mat_mkl_pardiso->schur_inverted = PETSC_FALSE; 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "PetscSetMKL_PARDISOFromOptions" 913 PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A) 914 { 915 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data; 916 PetscErrorCode ierr; 917 PetscInt icntl,threads=1; 918 PetscBool flg; 919 920 PetscFunctionBegin; 921 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");CHKERRQ(ierr); 922 923 ierr = PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);CHKERRQ(ierr); 924 if (flg) PetscSetMKL_PARDISOThreads((int)threads); 925 926 ierr = PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 927 if (flg) mat_mkl_pardiso->maxfct = icntl; 928 929 ierr = PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 930 if (flg) mat_mkl_pardiso->mnum = icntl; 931 932 ierr = PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 933 if (flg) mat_mkl_pardiso->msglvl = icntl; 934 935 ierr = PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 936 if(flg){ 937 void *pt[IPARM_SIZE]; 938 mat_mkl_pardiso->mtype = icntl; 939 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 940 #if defined(PETSC_USE_REAL_SINGLE) 941 mat_mkl_pardiso->iparm[27] = 1; 942 #else 943 mat_mkl_pardiso->iparm[27] = 0; 944 #endif 945 mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */ 946 } 947 ierr = PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 948 949 if (flg && icntl != 0) { 950 ierr = PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 951 if (flg) mat_mkl_pardiso->iparm[1] = icntl; 952 953 ierr = PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 954 if (flg) mat_mkl_pardiso->iparm[3] = icntl; 955 956 ierr = PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 957 if (flg) mat_mkl_pardiso->iparm[4] = icntl; 958 959 ierr = PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 960 if (flg) mat_mkl_pardiso->iparm[5] = icntl; 961 962 ierr = PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 963 if (flg) mat_mkl_pardiso->iparm[7] = icntl; 964 965 ierr = PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 966 if (flg) mat_mkl_pardiso->iparm[9] = icntl; 967 968 ierr = PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 969 if (flg) mat_mkl_pardiso->iparm[10] = icntl; 970 971 ierr = PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 972 if (flg) mat_mkl_pardiso->iparm[11] = icntl; 973 974 ierr = PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);CHKERRQ(ierr); 975 if (flg) mat_mkl_pardiso->iparm[12] = icntl; 976 977 ierr = PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);CHKERRQ(ierr); 978 if (flg) mat_mkl_pardiso->iparm[17] = icntl; 979 980 ierr = PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 981 if (flg) mat_mkl_pardiso->iparm[18] = icntl; 982 983 ierr = PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 984 if (flg) mat_mkl_pardiso->iparm[20] = icntl; 985 986 ierr = PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 987 if (flg) mat_mkl_pardiso->iparm[23] = icntl; 988 989 ierr = PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 990 if (flg) mat_mkl_pardiso->iparm[24] = icntl; 991 992 ierr = PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 993 if (flg) mat_mkl_pardiso->iparm[26] = icntl; 994 995 ierr = PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 996 if (flg) mat_mkl_pardiso->iparm[30] = icntl; 997 998 ierr = PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 999 if (flg) mat_mkl_pardiso->iparm[33] = icntl; 1000 1001 ierr = PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 1002 if (flg) mat_mkl_pardiso->iparm[59] = icntl; 1003 } 1004 PetscOptionsEnd(); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatFactorMKL_PARDISOInitialize_Private" 1010 PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso) 1011 { 1012 PetscErrorCode ierr; 1013 PetscInt i; 1014 1015 PetscFunctionBegin; 1016 for ( i = 0; i < IPARM_SIZE; i++ ){ 1017 mat_mkl_pardiso->iparm[i] = 0; 1018 } 1019 for ( i = 0; i < IPARM_SIZE; i++ ){ 1020 mat_mkl_pardiso->pt[i] = 0; 1021 } 1022 /* Default options for both sym and unsym */ 1023 mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 1024 mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */ 1025 mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */ 1026 mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */ 1027 mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 1028 mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 1029 #if 0 1030 mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/ 1031 #endif 1032 mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ 1033 mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */ 1034 1035 mat_mkl_pardiso->CleanUp = PETSC_FALSE; 1036 mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */ 1037 mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */ 1038 mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */ 1039 mat_mkl_pardiso->phase = -1; 1040 mat_mkl_pardiso->err = 0; 1041 1042 mat_mkl_pardiso->n = A->rmap->N; 1043 mat_mkl_pardiso->nrhs = 1; 1044 mat_mkl_pardiso->err = 0; 1045 mat_mkl_pardiso->phase = -1; 1046 1047 if(ftype == MAT_FACTOR_LU){ 1048 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 1049 mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 1050 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 1051 1052 } else { 1053 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 1054 mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */ 1055 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 1056 /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */ 1057 #if defined(PETSC_USE_DEBUG) 1058 mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */ 1059 #endif 1060 } 1061 ierr = PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);CHKERRQ(ierr); 1062 for(i = 0; i < A->rmap->N; i++){ 1063 mat_mkl_pardiso->perm[i] = 0; 1064 } 1065 mat_mkl_pardiso->schur_size = 0; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatFactorSymbolic_AIJMKL_PARDISO_Private" 1071 PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info) 1072 { 1073 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 1078 ierr = PetscSetMKL_PARDISOFromOptions(F,A);CHKERRQ(ierr); 1079 1080 /* throw away any previously computed structure */ 1081 if (mat_mkl_pardiso->freeaij) { 1082 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 1083 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 1084 } 1085 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 1086 mat_mkl_pardiso->n = A->rmap->N; 1087 1088 mat_mkl_pardiso->phase = JOB_ANALYSIS; 1089 1090 MKL_PARDISO (mat_mkl_pardiso->pt, 1091 &mat_mkl_pardiso->maxfct, 1092 &mat_mkl_pardiso->mnum, 1093 &mat_mkl_pardiso->mtype, 1094 &mat_mkl_pardiso->phase, 1095 &mat_mkl_pardiso->n, 1096 mat_mkl_pardiso->a, 1097 mat_mkl_pardiso->ia, 1098 mat_mkl_pardiso->ja, 1099 mat_mkl_pardiso->perm, 1100 &mat_mkl_pardiso->nrhs, 1101 mat_mkl_pardiso->iparm, 1102 &mat_mkl_pardiso->msglvl, 1103 NULL, 1104 NULL, 1105 &mat_mkl_pardiso->err); 1106 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d\n. Please check manual",mat_mkl_pardiso->err); 1107 1108 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 1109 1110 if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO; 1111 else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO; 1112 1113 F->ops->solve = MatSolve_MKL_PARDISO; 1114 F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO; 1115 F->ops->matsolve = MatMatSolve_MKL_PARDISO; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_PARDISO" 1121 PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1122 { 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #if !defined(PETSC_USE_COMPLEX) 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatGetInertia_MKL_PARDISO" 1133 PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F,int *nneg,int *nzero,int *npos) 1134 { 1135 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)F->data; 1136 1137 PetscFunctionBegin; 1138 if (nneg) *nneg = mat_mkl_pardiso->iparm[22]; 1139 if (npos) *npos = mat_mkl_pardiso->iparm[21]; 1140 if (nzero) *nzero = F->rmap->N -(mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]); 1141 PetscFunctionReturn(0); 1142 } 1143 #endif 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatCholeskyFactorSymbolic_AIJMKL_PARDISO" 1147 PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info) 1148 { 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_COMPLEX) 1154 F->ops->getinertia = NULL; 1155 #else 1156 F->ops->getinertia = MatGetInertia_MKL_PARDISO; 1157 #endif 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatView_MKL_PARDISO" 1163 PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer) 1164 { 1165 PetscErrorCode ierr; 1166 PetscBool iascii; 1167 PetscViewerFormat format; 1168 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data; 1169 PetscInt i; 1170 1171 PetscFunctionBegin; 1172 if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(0); 1173 1174 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1175 if (iascii) { 1176 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1177 if (format == PETSC_VIEWER_ASCII_INFO) { 1178 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");CHKERRQ(ierr); 1179 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);CHKERRQ(ierr); 1180 for(i = 1; i <= 64; i++){ 1181 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);CHKERRQ(ierr); 1184 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);CHKERRQ(ierr); 1185 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);CHKERRQ(ierr); 1186 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);CHKERRQ(ierr); 1187 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 1188 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);CHKERRQ(ierr); 1189 } 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatGetInfo_MKL_PARDISO" 1197 PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info) 1198 { 1199 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->data; 1200 1201 PetscFunctionBegin; 1202 info->block_size = 1.0; 1203 info->nz_used = mat_mkl_pardiso->nz; 1204 info->nz_allocated = mat_mkl_pardiso->nz; 1205 info->nz_unneeded = 0.0; 1206 info->assemblies = 0.0; 1207 info->mallocs = 0.0; 1208 info->memory = 0.0; 1209 info->fill_ratio_given = 0; 1210 info->fill_ratio_needed = 0; 1211 info->factor_mallocs = 0; 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatMkl_PardisoSetCntl_MKL_PARDISO" 1217 PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival) 1218 { 1219 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->data; 1220 1221 PetscFunctionBegin; 1222 if(icntl <= 64){ 1223 mat_mkl_pardiso->iparm[icntl - 1] = ival; 1224 } else { 1225 if(icntl == 65) PetscSetMKL_PARDISOThreads(ival); 1226 else if(icntl == 66) mat_mkl_pardiso->maxfct = ival; 1227 else if(icntl == 67) mat_mkl_pardiso->mnum = ival; 1228 else if(icntl == 68) mat_mkl_pardiso->msglvl = ival; 1229 else if(icntl == 69){ 1230 void *pt[IPARM_SIZE]; 1231 mat_mkl_pardiso->mtype = ival; 1232 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 1233 #if defined(PETSC_USE_REAL_SINGLE) 1234 mat_mkl_pardiso->iparm[27] = 1; 1235 #else 1236 mat_mkl_pardiso->iparm[27] = 0; 1237 #endif 1238 mat_mkl_pardiso->iparm[34] = 1; 1239 } else if(icntl==70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival; 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatMkl_PardisoSetCntl" 1246 /*@ 1247 MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters 1248 1249 Logically Collective on Mat 1250 1251 Input Parameters: 1252 + F - the factored matrix obtained by calling MatGetFactor() 1253 . icntl - index of Mkl_Pardiso parameter 1254 - ival - value of Mkl_Pardiso parameter 1255 1256 Options Database: 1257 . -mat_mkl_pardiso_<icntl> <ival> 1258 1259 Level: beginner 1260 1261 References: 1262 . Mkl_Pardiso Users' Guide 1263 1264 .seealso: MatGetFactor() 1265 @*/ 1266 PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 1267 { 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 /*MC 1276 MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for 1277 sequential matrices via the external package MKL_PARDISO. 1278 1279 Works with MATSEQAIJ matrices 1280 1281 Use -pc_type lu -pc_factor_mat_solver_package mkl_pardiso to us this direct solver 1282 1283 Options Database Keys: 1284 + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO 1285 . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 1286 . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase 1287 . -mat_mkl_pardiso_68 - Message level information 1288 . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 1289 . -mat_mkl_pardiso_1 - Use default values 1290 . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix 1291 . -mat_mkl_pardiso_4 - Preconditioned CGS/CG 1292 . -mat_mkl_pardiso_5 - User permutation 1293 . -mat_mkl_pardiso_6 - Write solution on x 1294 . -mat_mkl_pardiso_8 - Iterative refinement step 1295 . -mat_mkl_pardiso_10 - Pivoting perturbation 1296 . -mat_mkl_pardiso_11 - Scaling vectors 1297 . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A 1298 . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching 1299 . -mat_mkl_pardiso_18 - Numbers of non-zero elements 1300 . -mat_mkl_pardiso_19 - Report number of floating point operations 1301 . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices 1302 . -mat_mkl_pardiso_24 - Parallel factorization control 1303 . -mat_mkl_pardiso_25 - Parallel forward/backward solve control 1304 . -mat_mkl_pardiso_27 - Matrix checker 1305 . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors 1306 . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 1307 - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode 1308 1309 Level: beginner 1310 1311 For more information please check mkl_pardiso manual 1312 1313 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage 1314 1315 M*/ 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_pardiso" 1318 static PetscErrorCode MatFactorGetSolverPackage_mkl_pardiso(Mat A, const MatSolverPackage *type) 1319 { 1320 PetscFunctionBegin; 1321 *type = MATSOLVERMKL_PARDISO; 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetFactor_aij_mkl_pardiso" 1327 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F) 1328 { 1329 Mat B; 1330 PetscErrorCode ierr; 1331 Mat_MKL_PARDISO *mat_mkl_pardiso; 1332 PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 1336 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 1337 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 1338 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1339 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1340 ierr = PetscStrallocpy("mkl_pardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 1341 ierr = MatSetUp(B);CHKERRQ(ierr); 1342 1343 ierr = PetscNewLog(B,&mat_mkl_pardiso);CHKERRQ(ierr); 1344 B->data = mat_mkl_pardiso; 1345 1346 ierr = MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);CHKERRQ(ierr); 1347 if (ftype == MAT_FACTOR_LU) { 1348 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO; 1349 B->factortype = MAT_FACTOR_LU; 1350 mat_mkl_pardiso->needsym = PETSC_FALSE; 1351 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1352 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1353 else if (isSeqSBAIJ) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead"); 1354 else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name); 1355 mat_mkl_pardiso->schur_solver_type = 0; 1356 #if defined(PETSC_USE_COMPLEX) 1357 mat_mkl_pardiso->mtype = 13; 1358 #else 1359 if (A->structurally_symmetric) mat_mkl_pardiso->mtype = 1; 1360 else mat_mkl_pardiso->mtype = 11; 1361 #endif 1362 } else { 1363 #if defined(PETSC_USE_COMPLEX) 1364 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name); 1365 #endif 1366 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO; 1367 B->factortype = MAT_FACTOR_CHOLESKY; 1368 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1369 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1370 else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij; 1371 else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name); 1372 1373 mat_mkl_pardiso->needsym = PETSC_TRUE; 1374 if (A->spd_set && A->spd) { 1375 mat_mkl_pardiso->schur_solver_type = 1; 1376 mat_mkl_pardiso->mtype = 2; 1377 } else { 1378 mat_mkl_pardiso->schur_solver_type = 2; 1379 mat_mkl_pardiso->mtype = -2; 1380 } 1381 } 1382 B->ops->destroy = MatDestroy_MKL_PARDISO; 1383 B->ops->view = MatView_MKL_PARDISO; 1384 B->factortype = ftype; 1385 B->ops->getinfo = MatGetInfo_MKL_PARDISO; 1386 B->assembled = PETSC_TRUE; 1387 1388 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 1389 ierr = PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);CHKERRQ(ierr); 1390 1391 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_pardiso);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1394 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSchurComplement_C",MatFactorGetSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1395 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorInvertSchurComplement_C",MatFactorInvertSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1396 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorFactorizeSchurComplement_C",MatFactorFactorizeSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1397 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplement_C",MatFactorSolveSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1398 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplementTranspose_C",MatFactorSolveSchurComplementTranspose_MKL_PARDISO);CHKERRQ(ierr); 1399 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurComplementSolverType_C",MatFactorSetSchurComplementSolverType_MKL_PARDISO);CHKERRQ(ierr); 1400 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);CHKERRQ(ierr); 1401 1402 *F = B; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSolverPackageRegister_MKL_Pardiso" 1408 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_Pardiso(void) 1409 { 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1414 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1415 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419