1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/sbaij/seq/sbaij.h> 7 #include <../src/mat/impls/dense/seq/dense.h> 8 #include <petscblaslapack.h> 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <math.h> 13 #include <mkl.h> 14 15 /* 16 * Possible mkl_pardiso phases that controls the execution of the solver. 17 * For more information check mkl_pardiso manual. 18 */ 19 #define JOB_ANALYSIS 11 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 21 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 22 #define JOB_NUMERICAL_FACTORIZATION 22 23 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 24 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 25 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 26 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 27 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 28 #define JOB_RELEASE_OF_LU_MEMORY 0 29 #define JOB_RELEASE_OF_ALL_MEMORY -1 30 31 #define IPARM_SIZE 64 32 33 #if defined(PETSC_USE_64BIT_INDICES) 34 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 35 /* sizeof(MKL_INT) == sizeof(long long int) if ilp64*/ 36 #define INT_TYPE long long int 37 #define MKL_PARDISO pardiso 38 #define MKL_PARDISO_INIT pardisoinit 39 #else 40 #define INT_TYPE long long int 41 #define MKL_PARDISO pardiso_64 42 #define MKL_PARDISO_INIT pardiso_64init 43 #endif 44 #else 45 #define INT_TYPE int 46 #define MKL_PARDISO pardiso 47 #define MKL_PARDISO_INIT pardisoinit 48 #endif 49 50 51 /* 52 * Internal data structure. 53 * For more information check mkl_pardiso manual. 54 */ 55 typedef struct { 56 57 /* Configuration vector*/ 58 INT_TYPE iparm[IPARM_SIZE]; 59 60 /* 61 * Internal mkl_pardiso memory location. 62 * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak. 63 */ 64 void *pt[IPARM_SIZE]; 65 66 /* Basic mkl_pardiso info*/ 67 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 68 69 /* Matrix structure*/ 70 void *a; 71 INT_TYPE *ia, *ja; 72 73 /* Number of non-zero elements*/ 74 INT_TYPE nz; 75 76 /* Row permutaton vector*/ 77 INT_TYPE *perm; 78 79 /* Define if matrix preserves sparse structure.*/ 80 MatStructure matstruc; 81 82 PetscBool needsym; 83 PetscBool freeaij; 84 85 /* Schur complement */ 86 PetscScalar *schur; 87 PetscInt schur_size; 88 PetscInt *schur_idxs; 89 PetscScalar *schur_work; 90 PetscBLASInt schur_work_size; 91 PetscInt schur_solver_type; 92 PetscInt *schur_pivots; 93 PetscBool schur_factored; 94 PetscBool schur_inverted; 95 96 /* True if mkl_pardiso function have been used.*/ 97 PetscBool CleanUp; 98 99 /* Conversion to a format suitable for MKL */ 100 PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, void**); 101 PetscErrorCode (*Destroy)(Mat); 102 } Mat_MKL_PARDISO; 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatMKLPardiso_Convert_seqsbaij" 106 PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,void **v) 107 { 108 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 109 PetscInt bs = A->rmap->bs; 110 111 PetscFunctionBegin; 112 if (!sym) { 113 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen"); 114 } 115 if (bs == 1) { /* already in the correct format */ 116 *v = aa->a; 117 *r = aa->i; 118 *c = aa->j; 119 *nnz = aa->nz; 120 *free = PETSC_FALSE; 121 } else { 122 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented"); 123 } 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatMKLPardiso_Convert_seqbaij" 129 PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,void **v) 130 { 131 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 132 PetscFunctionBegin; 133 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented"); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatMKLPardiso_Convert_seqaij" 139 PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,void **v) 140 { 141 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 142 PetscErrorCode ierr; 143 144 PetscFunctionBegin; 145 if (!sym) { /* already in the correct format */ 146 *v = aa->a; 147 *r = aa->i; 148 *c = aa->j; 149 *nnz = aa->nz; 150 *free = PETSC_FALSE; 151 PetscFunctionReturn(0); 152 } 153 /* need to get the triangular part */ 154 if (reuse == MAT_INITIAL_MATRIX) { 155 PetscScalar *vals,*vv; 156 PetscInt *row,*col,*jj; 157 PetscInt m = A->rmap->n,nz,i; 158 159 nz = 0; 160 for (i=0; i<m; i++) { 161 nz += aa->i[i+1] - aa->diag[i]; 162 } 163 ierr = PetscMalloc2(m+1,&row,nz,&col);CHKERRQ(ierr); 164 ierr = PetscMalloc1(nz,&vals);CHKERRQ(ierr); 165 jj = col; 166 vv = vals; 167 168 row[0] = 0; 169 for (i=0; i<m; i++) { 170 PetscInt *aj = aa->j + aa->diag[i]; 171 PetscScalar *av = aa->a + aa->diag[i]; 172 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 173 for (j=0; j<rl; j++) { 174 *jj = *aj; jj++; aj++; 175 *vv = *av; vv++; av++; 176 } 177 row[i+1] = row[i] + rl; 178 } 179 *v = vals; 180 *r = row; 181 *c = col; 182 *nnz = nz; 183 *free = PETSC_TRUE; 184 } else { 185 PetscScalar *vv; 186 PetscInt m = A->rmap->n,i; 187 188 vv = *v; 189 for (i=0; i<m; i++) { 190 PetscInt *aj = aa->j + aa->diag[i]; 191 PetscScalar *av = aa->a + aa->diag[i]; 192 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 193 for (j=0; j<rl; j++) { 194 *vv = *av; vv++; av++; 195 } 196 } 197 *free = PETSC_TRUE; 198 } 199 PetscFunctionReturn(0); 200 } 201 202 void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm []) 203 { 204 int iparm_copy[IPARM_SIZE], mtype_copy, i; 205 206 mtype_copy = *mtype; 207 pardisoinit(pt, &mtype_copy, iparm_copy); 208 for(i = 0; i < IPARM_SIZE; i++){ 209 iparm[i] = iparm_copy[i]; 210 } 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMKLPardisoFactorSchur_Private" 215 static PetscErrorCode MatMKLPardisoFactorSchur_Private(Mat_MKL_PARDISO* mpardiso) 216 { 217 PetscBLASInt B_N,B_ierr; 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 if (mpardiso->schur_factored) { 222 PetscFunctionReturn(0); 223 } 224 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 225 switch (mpardiso->schur_solver_type) { 226 case 1: /* symmetric */ 227 if (!mpardiso->schur_pivots) { 228 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 229 } 230 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 231 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr)); 232 ierr = PetscFPTrapPop();CHKERRQ(ierr); 233 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 234 break; 235 case 2: /* hermitian solver */ 236 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 237 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 239 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 240 break; 241 default: /* general */ 242 if (!mpardiso->schur_pivots) { 243 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&B_ierr)); 247 ierr = PetscFPTrapPop();CHKERRQ(ierr); 248 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 249 break; 250 } 251 mpardiso->schur_factored = PETSC_TRUE; 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatMKLPardisoInvertSchur_Private" 257 static PetscErrorCode MatMKLPardisoInvertSchur_Private(Mat_MKL_PARDISO* mpardiso) 258 { 259 PetscBLASInt B_N,B_ierr; 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 264 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 265 switch (mpardiso->schur_solver_type) { 266 case 1: /* symmetric */ 267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 268 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&B_ierr)); 269 ierr = PetscFPTrapPop();CHKERRQ(ierr); 270 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 271 break; 272 case 2: /* hermitian solver */ 273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 274 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 275 ierr = PetscFPTrapPop();CHKERRQ(ierr); 276 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 277 break; 278 default: /* general */ 279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 280 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr)); 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 283 break; 284 } 285 mpardiso->schur_inverted = PETSC_TRUE; 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatMKLPardisoSolveSchur_Private" 291 static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat_MKL_PARDISO* mpardiso, PetscScalar *B, PetscScalar *X) 292 { 293 PetscScalar one=1.,zero=0.,*schur_rhs,*schur_sol; 294 PetscBLASInt B_N,B_Nrhs,B_ierr; 295 char type[2]; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 300 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 301 ierr = PetscBLASIntCast(mpardiso->nrhs,&B_Nrhs);CHKERRQ(ierr); 302 if (X == B && mpardiso->schur_inverted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address"); 303 if (X != B) { /* using LAPACK *TRS subroutines */ 304 ierr = PetscMemcpy(X,B,B_N*B_Nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 305 } 306 schur_rhs = B; 307 schur_sol = X; 308 switch (mpardiso->schur_solver_type) { 309 case 1: /* symmetric solver */ 310 if (mpardiso->schur_inverted) { 311 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 312 } else { 313 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 314 PetscStackCallBLAS("LAPACKsytrs",LAPACKsytrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 315 ierr = PetscFPTrapPop();CHKERRQ(ierr); 316 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRS Lapack routine %d",(int)B_ierr); 317 } 318 break; 319 case 2: /* hermitian solver */ 320 if (mpardiso->schur_inverted) { /* BLAShemm should go here */ 321 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 322 } else { 323 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 324 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,schur_sol,&B_N,&B_ierr)); 325 ierr = PetscFPTrapPop();CHKERRQ(ierr); 326 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRS Lapack routine %d",(int)B_ierr); 327 } 328 break; 329 default: /* general */ 330 switch (mpardiso->iparm[12-1]) { 331 case 1: 332 sprintf(type,"C"); 333 break; 334 case 2: 335 sprintf(type,"T"); 336 break; 337 default: 338 sprintf(type,"N"); 339 break; 340 } 341 if (mpardiso->schur_inverted) { 342 PetscStackCallBLAS("BLASgemm",BLASgemm_(type,"N",&B_N,&B_Nrhs,&B_N,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 343 } else { 344 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 345 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_(type,&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 346 ierr = PetscFPTrapPop();CHKERRQ(ierr); 347 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRS Lapack routine %d",(int)B_ierr); 348 } 349 break; 350 } 351 PetscFunctionReturn(0); 352 } 353 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatFactorSetSchurIS_MKL_PARDISO" 357 PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is) 358 { 359 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 360 const PetscInt *idxs; 361 PetscInt size,i; 362 PetscMPIInt csize; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);CHKERRQ(ierr); 367 if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc\n"); 368 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 369 if (mpardiso->schur_size != size) { 370 mpardiso->schur_size = size; 371 ierr = PetscFree2(mpardiso->schur,mpardiso->schur_work);CHKERRQ(ierr); 372 ierr = PetscFree(mpardiso->schur_idxs);CHKERRQ(ierr); 373 ierr = PetscFree(mpardiso->schur_pivots);CHKERRQ(ierr); 374 ierr = PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);CHKERRQ(ierr); 375 ierr = PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);CHKERRQ(ierr); 376 ierr = PetscMalloc1(size,&mpardiso->schur_idxs);CHKERRQ(ierr); 377 } 378 ierr = PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));CHKERRQ(ierr); 379 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 380 ierr = PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 381 for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1; 382 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 383 if (size) { /* turn on Schur switch if we the set of indices is not empty */ 384 mpardiso->iparm[36-1] = 2; 385 } 386 mpardiso->schur_factored = PETSC_FALSE; 387 mpardiso->schur_inverted = PETSC_FALSE; 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatFactorCreateSchurComplement_MKL_PARDISO" 393 PetscErrorCode MatFactorCreateSchurComplement_MKL_PARDISO(Mat F,Mat* S) 394 { 395 Mat St; 396 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 397 PetscScalar *array; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 402 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 403 404 ierr = MatCreate(PetscObjectComm((PetscObject)F),&St);CHKERRQ(ierr); 405 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mpardiso->schur_size,mpardiso->schur_size);CHKERRQ(ierr); 406 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 407 ierr = MatSetUp(St);CHKERRQ(ierr); 408 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(array,mpardiso->schur,mpardiso->schur_size*mpardiso->schur_size*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 411 *S = St; 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatFactorGetSchurComplement_MKL_PARDISO" 417 PetscErrorCode MatFactorGetSchurComplement_MKL_PARDISO(Mat F,Mat* S) 418 { 419 Mat St; 420 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 421 PetscErrorCode ierr; 422 423 PetscFunctionBegin; 424 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 425 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 426 427 ierr = MatCreateSeqDense(PetscObjectComm((PetscObject)F),mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&St);CHKERRQ(ierr); 428 *S = St; 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatFactorInvertSchurComplement_MKL_PARDISO" 434 PetscErrorCode MatFactorInvertSchurComplement_MKL_PARDISO(Mat F) 435 { 436 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 437 PetscErrorCode ierr; 438 439 PetscFunctionBegin; 440 if (!mpardiso->iparm[36-1]) { /* do nothing */ 441 PetscFunctionReturn(0); 442 } 443 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 444 ierr = MatMKLPardisoInvertSchur_Private(mpardiso);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatFactorSolveSchurComplement_MKL_PARDISO" 450 PetscErrorCode MatFactorSolveSchurComplement_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 451 { 452 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 453 PetscScalar *asol,*arhs; 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 458 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 459 460 mpardiso->nrhs = 1; 461 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 462 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 463 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 464 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 465 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatFactorSolveSchurComplementTranspose_MKL_PARDISO" 471 PetscErrorCode MatFactorSolveSchurComplementTranspose_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 472 { 473 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 474 PetscScalar *asol,*arhs; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 479 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 480 481 mpardiso->nrhs = 1; 482 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 483 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 484 mpardiso->iparm[12 - 1] = 2; 485 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 486 mpardiso->iparm[12 - 1] = 0; 487 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 488 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatDestroy_MKL_PARDISO" 494 PetscErrorCode MatDestroy_MKL_PARDISO(Mat A) 495 { 496 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 497 PetscErrorCode ierr; 498 499 PetscFunctionBegin; 500 if (mat_mkl_pardiso->CleanUp) { 501 mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 502 503 MKL_PARDISO (mat_mkl_pardiso->pt, 504 &mat_mkl_pardiso->maxfct, 505 &mat_mkl_pardiso->mnum, 506 &mat_mkl_pardiso->mtype, 507 &mat_mkl_pardiso->phase, 508 &mat_mkl_pardiso->n, 509 NULL, 510 NULL, 511 NULL, 512 mat_mkl_pardiso->perm, 513 &mat_mkl_pardiso->nrhs, 514 mat_mkl_pardiso->iparm, 515 &mat_mkl_pardiso->msglvl, 516 NULL, 517 NULL, 518 &mat_mkl_pardiso->err); 519 } 520 ierr = PetscFree(mat_mkl_pardiso->perm);CHKERRQ(ierr); 521 ierr = PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 522 ierr = PetscFree(mat_mkl_pardiso->schur_idxs);CHKERRQ(ierr); 523 ierr = PetscFree(mat_mkl_pardiso->schur_pivots);CHKERRQ(ierr); 524 if (mat_mkl_pardiso->freeaij) { 525 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 526 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 527 } 528 if (mat_mkl_pardiso->Destroy) { 529 ierr = (mat_mkl_pardiso->Destroy)(A);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 532 533 /* clear composed functions */ 534 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 535 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 536 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 537 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSchurComplement_C",NULL);CHKERRQ(ierr); 538 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorInvertSchurComplement_C",NULL);CHKERRQ(ierr); 539 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplement_C",NULL);CHKERRQ(ierr); 540 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplementTranspose_C",NULL);CHKERRQ(ierr); 541 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMKLPardisoScatterSchur_Private" 547 static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce) 548 { 549 PetscFunctionBegin; 550 551 if (reduce) { /* data given for the whole matrix */ 552 PetscInt i,m=0,p=0; 553 for (i=0;i<mpardiso->nrhs;i++) { 554 PetscInt j; 555 for (j=0;j<mpardiso->schur_size;j++) { 556 schur[p+j] = whole[m+mpardiso->schur_idxs[j]]; 557 } 558 m += mpardiso->n; 559 p += mpardiso->schur_size; 560 } 561 } else { /* from Schur to whole */ 562 PetscInt i,m=0,p=0; 563 for (i=0;i<mpardiso->nrhs;i++) { 564 PetscInt j; 565 for (j=0;j<mpardiso->schur_size;j++) { 566 whole[m+mpardiso->schur_idxs[j]] = schur[p+j]; 567 } 568 m += mpardiso->n; 569 p += mpardiso->schur_size; 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatSolve_MKL_PARDISO" 577 PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x) 578 { 579 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr; 580 PetscErrorCode ierr; 581 PetscScalar *xarray; 582 const PetscScalar *barray; 583 584 PetscFunctionBegin; 585 mat_mkl_pardiso->nrhs = 1; 586 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 588 589 if (!mat_mkl_pardiso->schur) { 590 mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 591 } else { 592 mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 593 } 594 mat_mkl_pardiso->iparm[6-1] = 0; 595 MKL_PARDISO (mat_mkl_pardiso->pt, 596 &mat_mkl_pardiso->maxfct, 597 &mat_mkl_pardiso->mnum, 598 &mat_mkl_pardiso->mtype, 599 &mat_mkl_pardiso->phase, 600 &mat_mkl_pardiso->n, 601 mat_mkl_pardiso->a, 602 mat_mkl_pardiso->ia, 603 mat_mkl_pardiso->ja, 604 mat_mkl_pardiso->perm, 605 &mat_mkl_pardiso->nrhs, 606 mat_mkl_pardiso->iparm, 607 &mat_mkl_pardiso->msglvl, 608 (void*)barray, 609 (void*)xarray, 610 &mat_mkl_pardiso->err); 611 612 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 613 614 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 615 PetscInt shift = mat_mkl_pardiso->schur_size; 616 617 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 618 if (!mat_mkl_pardiso->schur_inverted) { 619 shift = 0; 620 } 621 622 /* solve Schur complement */ 623 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 624 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 625 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 626 627 /* expansion phase */ 628 mat_mkl_pardiso->iparm[6-1] = 1; 629 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 630 MKL_PARDISO (mat_mkl_pardiso->pt, 631 &mat_mkl_pardiso->maxfct, 632 &mat_mkl_pardiso->mnum, 633 &mat_mkl_pardiso->mtype, 634 &mat_mkl_pardiso->phase, 635 &mat_mkl_pardiso->n, 636 mat_mkl_pardiso->a, 637 mat_mkl_pardiso->ia, 638 mat_mkl_pardiso->ja, 639 mat_mkl_pardiso->perm, 640 &mat_mkl_pardiso->nrhs, 641 mat_mkl_pardiso->iparm, 642 &mat_mkl_pardiso->msglvl, 643 (void*)xarray, 644 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 645 &mat_mkl_pardiso->err); 646 647 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 648 mat_mkl_pardiso->iparm[6-1] = 0; 649 } 650 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 651 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 652 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolveTranspose_MKL_PARDISO" 658 PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x) 659 { 660 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 #if defined(PETSC_USE_COMPLEX) 665 mat_mkl_pardiso->iparm[12 - 1] = 1; 666 #else 667 mat_mkl_pardiso->iparm[12 - 1] = 2; 668 #endif 669 ierr = MatSolve_MKL_PARDISO(A,b,x);CHKERRQ(ierr); 670 mat_mkl_pardiso->iparm[12 - 1] = 0; 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMatSolve_MKL_PARDISO" 676 PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X) 677 { 678 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr; 679 PetscErrorCode ierr; 680 PetscScalar *barray, *xarray; 681 PetscBool flg; 682 683 PetscFunctionBegin; 684 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 685 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 686 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 687 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 688 689 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 690 691 if(mat_mkl_pardiso->nrhs > 0){ 692 ierr = MatDenseGetArray(B,&barray); 693 ierr = MatDenseGetArray(X,&xarray); 694 695 if (!mat_mkl_pardiso->schur) { 696 mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 697 } else { 698 mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 699 } 700 mat_mkl_pardiso->iparm[6-1] = 0; 701 MKL_PARDISO (mat_mkl_pardiso->pt, 702 &mat_mkl_pardiso->maxfct, 703 &mat_mkl_pardiso->mnum, 704 &mat_mkl_pardiso->mtype, 705 &mat_mkl_pardiso->phase, 706 &mat_mkl_pardiso->n, 707 mat_mkl_pardiso->a, 708 mat_mkl_pardiso->ia, 709 mat_mkl_pardiso->ja, 710 mat_mkl_pardiso->perm, 711 &mat_mkl_pardiso->nrhs, 712 mat_mkl_pardiso->iparm, 713 &mat_mkl_pardiso->msglvl, 714 (void*)barray, 715 (void*)xarray, 716 &mat_mkl_pardiso->err); 717 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 718 719 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 720 PetscScalar *o_schur_work = NULL; 721 PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale; 722 PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs; 723 724 /* allocate extra memory if it is needed */ 725 scale = 1; 726 if (mat_mkl_pardiso->schur_inverted) { 727 scale = 2; 728 } 729 mem *= scale; 730 if (mem > mat_mkl_pardiso->schur_work_size) { 731 o_schur_work = mat_mkl_pardiso->schur_work; 732 ierr = PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 733 } 734 735 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 736 if (!mat_mkl_pardiso->schur_inverted) { 737 shift = 0; 738 } 739 740 /* solve Schur complement */ 741 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 742 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 743 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 744 745 /* expansion phase */ 746 mat_mkl_pardiso->iparm[6-1] = 1; 747 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 748 MKL_PARDISO (mat_mkl_pardiso->pt, 749 &mat_mkl_pardiso->maxfct, 750 &mat_mkl_pardiso->mnum, 751 &mat_mkl_pardiso->mtype, 752 &mat_mkl_pardiso->phase, 753 &mat_mkl_pardiso->n, 754 mat_mkl_pardiso->a, 755 mat_mkl_pardiso->ia, 756 mat_mkl_pardiso->ja, 757 mat_mkl_pardiso->perm, 758 &mat_mkl_pardiso->nrhs, 759 mat_mkl_pardiso->iparm, 760 &mat_mkl_pardiso->msglvl, 761 (void*)xarray, 762 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 763 &mat_mkl_pardiso->err); 764 if (o_schur_work) { /* restore original schur_work (minimal size) */ 765 ierr = PetscFree(mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 766 mat_mkl_pardiso->schur_work = o_schur_work; 767 } 768 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 769 mat_mkl_pardiso->iparm[6-1] = 0; 770 } 771 } 772 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatFactorNumeric_MKL_PARDISO" 778 PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info) 779 { 780 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->spptr; 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 785 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,&mat_mkl_pardiso->a);CHKERRQ(ierr); 786 787 mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION; 788 MKL_PARDISO (mat_mkl_pardiso->pt, 789 &mat_mkl_pardiso->maxfct, 790 &mat_mkl_pardiso->mnum, 791 &mat_mkl_pardiso->mtype, 792 &mat_mkl_pardiso->phase, 793 &mat_mkl_pardiso->n, 794 mat_mkl_pardiso->a, 795 mat_mkl_pardiso->ia, 796 mat_mkl_pardiso->ja, 797 mat_mkl_pardiso->perm, 798 &mat_mkl_pardiso->nrhs, 799 mat_mkl_pardiso->iparm, 800 &mat_mkl_pardiso->msglvl, 801 NULL, 802 (void*)mat_mkl_pardiso->schur, 803 &mat_mkl_pardiso->err); 804 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 805 806 if (mat_mkl_pardiso->schur) { /* schur output from pardiso is in row major format */ 807 PetscInt j,k,n=mat_mkl_pardiso->schur_size; 808 for (j=0; j<n; j++) { 809 for (k=0; k<j; k++) { 810 PetscScalar tmp = mat_mkl_pardiso->schur[j + k*n]; 811 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 812 mat_mkl_pardiso->schur[k + j*n] = tmp; 813 } 814 } 815 } 816 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 817 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 818 mat_mkl_pardiso->schur_factored = PETSC_FALSE; 819 mat_mkl_pardiso->schur_inverted = PETSC_FALSE; 820 mat_mkl_pardiso->schur_solver_type = 0; 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PetscSetMKL_PARDISOFromOptions" 826 PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A) 827 { 828 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr; 829 PetscErrorCode ierr; 830 PetscInt icntl; 831 PetscBool flg; 832 int pt[IPARM_SIZE], threads = 1; 833 834 PetscFunctionBegin; 835 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");CHKERRQ(ierr); 836 ierr = PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 837 if (flg) mkl_set_num_threads(threads); 838 839 ierr = PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 840 if (flg) mat_mkl_pardiso->maxfct = icntl; 841 842 ierr = PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 843 if (flg) mat_mkl_pardiso->mnum = icntl; 844 845 ierr = PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 846 if (flg) mat_mkl_pardiso->msglvl = icntl; 847 848 ierr = PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 849 if(flg){ 850 mat_mkl_pardiso->mtype = icntl; 851 MKL_PARDISO_INIT(&pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 852 #if defined(PETSC_USE_REAL_SINGLE) 853 mat_mkl_pardiso->iparm[27] = 1; 854 #else 855 mat_mkl_pardiso->iparm[27] = 0; 856 #endif 857 mat_mkl_pardiso->iparm[34] = 1; 858 } 859 ierr = PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 860 861 if(flg && icntl != 0){ 862 ierr = PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 863 if (flg) mat_mkl_pardiso->iparm[1] = icntl; 864 865 ierr = PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 866 if (flg) mat_mkl_pardiso->iparm[3] = icntl; 867 868 ierr = PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 869 if (flg) mat_mkl_pardiso->iparm[4] = icntl; 870 871 ierr = PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 872 if (flg) mat_mkl_pardiso->iparm[5] = icntl; 873 874 ierr = PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 875 if (flg) mat_mkl_pardiso->iparm[7] = icntl; 876 877 ierr = PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 878 if (flg) mat_mkl_pardiso->iparm[9] = icntl; 879 880 ierr = PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 881 if (flg) mat_mkl_pardiso->iparm[10] = icntl; 882 883 ierr = PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 884 if (flg) mat_mkl_pardiso->iparm[11] = icntl; 885 886 ierr = PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);CHKERRQ(ierr); 887 if (flg) mat_mkl_pardiso->iparm[12] = icntl; 888 889 ierr = PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);CHKERRQ(ierr); 890 if (flg) mat_mkl_pardiso->iparm[17] = icntl; 891 892 ierr = PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 893 if (flg) mat_mkl_pardiso->iparm[18] = icntl; 894 895 ierr = PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 896 if (flg) mat_mkl_pardiso->iparm[20] = icntl; 897 898 ierr = PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 899 if (flg) mat_mkl_pardiso->iparm[23] = icntl; 900 901 ierr = PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 902 if (flg) mat_mkl_pardiso->iparm[24] = icntl; 903 904 ierr = PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 905 if (flg) mat_mkl_pardiso->iparm[26] = icntl; 906 907 ierr = PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 908 if (flg) mat_mkl_pardiso->iparm[30] = icntl; 909 910 ierr = PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 911 if (flg) mat_mkl_pardiso->iparm[33] = icntl; 912 913 ierr = PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 914 if (flg) mat_mkl_pardiso->iparm[59] = icntl; 915 } 916 PetscOptionsEnd(); 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatFactorMKL_PARDISOInitialize_Private" 922 PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso) 923 { 924 PetscErrorCode ierr; 925 PetscInt i; 926 927 PetscFunctionBegin; 928 for ( i = 0; i < IPARM_SIZE; i++ ){ 929 mat_mkl_pardiso->iparm[i] = 0; 930 } 931 for ( i = 0; i < IPARM_SIZE; i++ ){ 932 mat_mkl_pardiso->pt[i] = 0; 933 } 934 /* Default options for both sym and unsym */ 935 mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 936 mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */ 937 mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */ 938 mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */ 939 mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 940 mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 941 #if 0 942 mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/ 943 #endif 944 mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ 945 mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */ 946 947 mat_mkl_pardiso->CleanUp = PETSC_FALSE; 948 mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */ 949 mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */ 950 mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */ 951 mat_mkl_pardiso->phase = -1; 952 mat_mkl_pardiso->err = 0; 953 954 mat_mkl_pardiso->n = A->rmap->N; 955 mat_mkl_pardiso->nrhs = 1; 956 mat_mkl_pardiso->err = 0; 957 mat_mkl_pardiso->phase = -1; 958 959 if(ftype == MAT_FACTOR_LU){ 960 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 961 mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 962 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 963 964 } else { 965 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 966 mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */ 967 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 968 /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */ 969 #if defined(PETSC_USE_DEBUG) 970 mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */ 971 #endif 972 } 973 ierr = PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);CHKERRQ(ierr); 974 for(i = 0; i < A->rmap->N; i++){ 975 mat_mkl_pardiso->perm[i] = 0; 976 } 977 mat_mkl_pardiso->schur_size = 0; 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatFactorSymbolic_AIJMKL_PARDISO_Private" 983 PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info) 984 { 985 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 990 ierr = PetscSetMKL_PARDISOFromOptions(F,A);CHKERRQ(ierr); 991 992 /* throw away any previously computed structure */ 993 if (mat_mkl_pardiso->freeaij) { 994 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 995 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 996 } 997 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,&mat_mkl_pardiso->a);CHKERRQ(ierr); 998 mat_mkl_pardiso->n = A->rmap->N; 999 1000 mat_mkl_pardiso->phase = JOB_ANALYSIS; 1001 1002 MKL_PARDISO (mat_mkl_pardiso->pt, 1003 &mat_mkl_pardiso->maxfct, 1004 &mat_mkl_pardiso->mnum, 1005 &mat_mkl_pardiso->mtype, 1006 &mat_mkl_pardiso->phase, 1007 &mat_mkl_pardiso->n, 1008 mat_mkl_pardiso->a, 1009 mat_mkl_pardiso->ia, 1010 mat_mkl_pardiso->ja, 1011 mat_mkl_pardiso->perm, 1012 &mat_mkl_pardiso->nrhs, 1013 mat_mkl_pardiso->iparm, 1014 &mat_mkl_pardiso->msglvl, 1015 NULL, 1016 NULL, 1017 &mat_mkl_pardiso->err); 1018 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d\n. Please check manual",mat_mkl_pardiso->err); 1019 1020 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 1021 1022 if (F->factortype == MAT_FACTOR_LU) { 1023 F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO; 1024 } else { 1025 F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO; 1026 } 1027 F->ops->solve = MatSolve_MKL_PARDISO; 1028 F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO; 1029 F->ops->matsolve = MatMatSolve_MKL_PARDISO; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_PARDISO" 1035 PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1036 { 1037 PetscErrorCode ierr; 1038 1039 PetscFunctionBegin; 1040 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatCholeskyFactorSymbolic_AIJMKL_PARDISO" 1046 PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MKL_PARDISO" 1057 PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer) 1058 { 1059 PetscErrorCode ierr; 1060 PetscBool iascii; 1061 PetscViewerFormat format; 1062 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 1063 PetscInt i; 1064 1065 PetscFunctionBegin; 1066 if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(0); 1067 1068 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1069 if (iascii) { 1070 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1071 if (format == PETSC_VIEWER_ASCII_INFO) { 1072 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);CHKERRQ(ierr); 1074 for(i = 1; i <= 64; i++){ 1075 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);CHKERRQ(ierr); 1076 } 1077 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);CHKERRQ(ierr); 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetInfo_MKL_PARDISO" 1091 PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info) 1092 { 1093 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->spptr; 1094 1095 PetscFunctionBegin; 1096 info->block_size = 1.0; 1097 info->nz_allocated = mat_mkl_pardiso->nz + 0.0; 1098 info->nz_unneeded = 0.0; 1099 info->assemblies = 0.0; 1100 info->mallocs = 0.0; 1101 info->memory = 0.0; 1102 info->fill_ratio_given = 0; 1103 info->fill_ratio_needed = 0; 1104 info->factor_mallocs = 0; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMkl_PardisoSetCntl_MKL_PARDISO" 1110 PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival) 1111 { 1112 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->spptr; 1113 1114 PetscFunctionBegin; 1115 if(icntl <= 64){ 1116 mat_mkl_pardiso->iparm[icntl - 1] = ival; 1117 } else { 1118 if(icntl == 65) 1119 mkl_set_num_threads((int)ival); 1120 else if(icntl == 66) 1121 mat_mkl_pardiso->maxfct = ival; 1122 else if(icntl == 67) 1123 mat_mkl_pardiso->mnum = ival; 1124 else if(icntl == 68) 1125 mat_mkl_pardiso->msglvl = ival; 1126 else if(icntl == 69){ 1127 int pt[IPARM_SIZE]; 1128 mat_mkl_pardiso->mtype = ival; 1129 MKL_PARDISO_INIT(&pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 1130 #if defined(PETSC_USE_REAL_SINGLE) 1131 mat_mkl_pardiso->iparm[27] = 1; 1132 #else 1133 mat_mkl_pardiso->iparm[27] = 0; 1134 #endif 1135 mat_mkl_pardiso->iparm[34] = 1; 1136 } 1137 } 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMkl_PardisoSetCntl" 1143 /*@ 1144 MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters 1145 1146 Logically Collective on Mat 1147 1148 Input Parameters: 1149 + F - the factored matrix obtained by calling MatGetFactor() 1150 . icntl - index of Mkl_Pardiso parameter 1151 - ival - value of Mkl_Pardiso parameter 1152 1153 Options Database: 1154 . -mat_mkl_pardiso_<icntl> <ival> 1155 1156 Level: beginner 1157 1158 References: Mkl_Pardiso Users' Guide 1159 1160 .seealso: MatGetFactor() 1161 @*/ 1162 PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 1163 { 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 /*MC 1172 MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for 1173 sequential matrices via the external package MKL_PARDISO. 1174 1175 Works with MATSEQAIJ matrices 1176 1177 Use -pc_type lu -pc_factor_mat_solver_package mkl_pardiso to us this direct solver 1178 1179 Options Database Keys: 1180 + -mat_mkl_pardiso_65 - Number of threads to use 1181 . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 1182 . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase 1183 . -mat_mkl_pardiso_68 - Message level information 1184 . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 1185 . -mat_mkl_pardiso_1 - Use default values 1186 . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix 1187 . -mat_mkl_pardiso_4 - Preconditioned CGS/CG 1188 . -mat_mkl_pardiso_5 - User permutation 1189 . -mat_mkl_pardiso_6 - Write solution on x 1190 . -mat_mkl_pardiso_8 - Iterative refinement step 1191 . -mat_mkl_pardiso_10 - Pivoting perturbation 1192 . -mat_mkl_pardiso_11 - Scaling vectors 1193 . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A 1194 . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching 1195 . -mat_mkl_pardiso_18 - Numbers of non-zero elements 1196 . -mat_mkl_pardiso_19 - Report number of floating point operations 1197 . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices 1198 . -mat_mkl_pardiso_24 - Parallel factorization control 1199 . -mat_mkl_pardiso_25 - Parallel forward/backward solve control 1200 . -mat_mkl_pardiso_27 - Matrix checker 1201 . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors 1202 . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 1203 - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode 1204 1205 Level: beginner 1206 1207 For more information please check mkl_pardiso manual 1208 1209 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage 1210 1211 M*/ 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_pardiso" 1214 static PetscErrorCode MatFactorGetSolverPackage_mkl_pardiso(Mat A, const MatSolverPackage *type) 1215 { 1216 PetscFunctionBegin; 1217 *type = MATSOLVERMKL_PARDISO; 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetFactor_aij_mkl_pardiso" 1223 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F) 1224 { 1225 Mat B; 1226 PetscErrorCode ierr; 1227 Mat_MKL_PARDISO *mat_mkl_pardiso; 1228 PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ; 1229 1230 PetscFunctionBegin; 1231 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 1232 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 1233 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 1234 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1235 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1236 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1237 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 1238 ierr = MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,NULL);CHKERRQ(ierr); 1239 ierr = MatSeqSBAIJSetPreallocation(B,A->rmap->bs,0,NULL);CHKERRQ(ierr); 1240 1241 ierr = PetscNewLog(B,&mat_mkl_pardiso);CHKERRQ(ierr); 1242 B->spptr = mat_mkl_pardiso; 1243 1244 ierr = MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);CHKERRQ(ierr); 1245 if (ftype == MAT_FACTOR_LU) { 1246 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO; 1247 B->factortype = MAT_FACTOR_LU; 1248 mat_mkl_pardiso->needsym = PETSC_FALSE; 1249 if (isSeqAIJ) { 1250 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1251 } else if (isSeqBAIJ) { 1252 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1253 } else if (isSeqSBAIJ) { 1254 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead"); 1255 } else { 1256 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name); 1257 } 1258 #if defined(PETSC_USE_COMPLEX) 1259 mat_mkl_pardiso->schur_solver_type = 0; /* use a general solver for the moment */ 1260 mat_mkl_pardiso->mtype = 13; 1261 #else 1262 if (A->structurally_symmetric) { 1263 mat_mkl_pardiso->mtype = 1; 1264 } else { 1265 mat_mkl_pardiso->mtype = 11; 1266 } 1267 #endif 1268 mat_mkl_pardiso->schur_solver_type = 0; 1269 } else { 1270 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO; 1271 B->factortype = MAT_FACTOR_CHOLESKY; 1272 if (isSeqAIJ) { 1273 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1274 } else if (isSeqBAIJ) { 1275 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1276 } else if (isSeqSBAIJ) { 1277 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij; 1278 } else { 1279 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name); 1280 } 1281 mat_mkl_pardiso->needsym = PETSC_TRUE; 1282 #if defined(PETSC_USE_COMPLEX) 1283 mat_mkl_pardiso->schur_solver_type = 0; /* use a general solver for the moment */ 1284 mat_mkl_pardiso->mtype = 13; 1285 #else 1286 if (A->spd_set && A->spd) { 1287 mat_mkl_pardiso->schur_solver_type = 2; 1288 mat_mkl_pardiso->mtype = 2; 1289 } else { 1290 mat_mkl_pardiso->schur_solver_type = 1; 1291 mat_mkl_pardiso->mtype = -2; 1292 } 1293 #endif 1294 } 1295 mat_mkl_pardiso->Destroy = B->ops->destroy; 1296 B->ops->destroy = MatDestroy_MKL_PARDISO; 1297 B->ops->view = MatView_MKL_PARDISO; 1298 B->factortype = ftype; 1299 B->ops->getinfo = MatGetInfo_MKL_PARDISO; 1300 B->assembled = PETSC_TRUE; 1301 1302 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_pardiso);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSchurComplement_C",MatFactorGetSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorInvertSchurComplement_C",MatFactorInvertSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplement_C",MatFactorSolveSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplementTranspose_C",MatFactorSolveSchurComplementTranspose_MKL_PARDISO);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);CHKERRQ(ierr); 1310 1311 *F = B; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatSolverPackageRegister_MKL_Pardiso" 1317 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_Pardiso(void) 1318 { 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1323 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1324 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328