1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/sbaij/seq/sbaij.h> 7 #include <../src/mat/impls/dense/seq/dense.h> 8 #include <petscblaslapack.h> 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <math.h> 13 #include <mkl_pardiso.h> 14 15 PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int); 16 17 /* 18 * Possible mkl_pardiso phases that controls the execution of the solver. 19 * For more information check mkl_pardiso manual. 20 */ 21 #define JOB_ANALYSIS 11 22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 24 #define JOB_NUMERICAL_FACTORIZATION 22 25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 30 #define JOB_RELEASE_OF_LU_MEMORY 0 31 #define JOB_RELEASE_OF_ALL_MEMORY -1 32 33 #define IPARM_SIZE 64 34 35 #if defined(PETSC_USE_64BIT_INDICES) 36 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 37 /* sizeof(MKL_INT) == sizeof(long long int) if ilp64*/ 38 #define INT_TYPE long long int 39 #define MKL_PARDISO pardiso 40 #define MKL_PARDISO_INIT pardisoinit 41 #else 42 #define INT_TYPE long long int 43 #define MKL_PARDISO pardiso_64 44 #define MKL_PARDISO_INIT pardiso_64init 45 #endif 46 #else 47 #define INT_TYPE int 48 #define MKL_PARDISO pardiso 49 #define MKL_PARDISO_INIT pardisoinit 50 #endif 51 52 53 /* 54 * Internal data structure. 55 * For more information check mkl_pardiso manual. 56 */ 57 typedef struct { 58 59 /* Configuration vector*/ 60 INT_TYPE iparm[IPARM_SIZE]; 61 62 /* 63 * Internal mkl_pardiso memory location. 64 * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak. 65 */ 66 void *pt[IPARM_SIZE]; 67 68 /* Basic mkl_pardiso info*/ 69 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 70 71 /* Matrix structure*/ 72 void *a; 73 INT_TYPE *ia, *ja; 74 75 /* Number of non-zero elements*/ 76 INT_TYPE nz; 77 78 /* Row permutaton vector*/ 79 INT_TYPE *perm; 80 81 /* Define if matrix preserves sparse structure.*/ 82 MatStructure matstruc; 83 84 PetscBool needsym; 85 PetscBool freeaij; 86 87 /* Schur complement */ 88 PetscScalar *schur; 89 PetscInt schur_size; 90 PetscInt *schur_idxs; 91 PetscScalar *schur_work; 92 PetscBLASInt schur_work_size; 93 PetscInt schur_solver_type; 94 PetscInt *schur_pivots; 95 PetscBool schur_factored; 96 PetscBool schur_inverted; 97 PetscBool solve_interior; 98 99 /* True if mkl_pardiso function have been used.*/ 100 PetscBool CleanUp; 101 102 /* Conversion to a format suitable for MKL */ 103 PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**); 104 PetscErrorCode (*Destroy)(Mat); 105 } Mat_MKL_PARDISO; 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatMKLPardiso_Convert_seqsbaij" 109 PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 110 { 111 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 112 PetscInt bs = A->rmap->bs; 113 114 PetscFunctionBegin; 115 if (!sym) { 116 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen"); 117 } 118 if (bs == 1) { /* already in the correct format */ 119 *v = aa->a; 120 *r = aa->i; 121 *c = aa->j; 122 *nnz = aa->nz; 123 *free = PETSC_FALSE; 124 } else { 125 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented"); 126 } 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatMKLPardiso_Convert_seqbaij" 132 PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 133 { 134 PetscFunctionBegin; 135 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented"); 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatMKLPardiso_Convert_seqaij" 141 PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v) 142 { 143 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (!sym) { /* already in the correct format */ 148 *v = aa->a; 149 *r = aa->i; 150 *c = aa->j; 151 *nnz = aa->nz; 152 *free = PETSC_FALSE; 153 PetscFunctionReturn(0); 154 } 155 /* need to get the triangular part */ 156 if (reuse == MAT_INITIAL_MATRIX) { 157 PetscScalar *vals,*vv; 158 PetscInt *row,*col,*jj; 159 PetscInt m = A->rmap->n,nz,i; 160 161 nz = 0; 162 for (i=0; i<m; i++) { 163 nz += aa->i[i+1] - aa->diag[i]; 164 } 165 ierr = PetscMalloc2(m+1,&row,nz,&col);CHKERRQ(ierr); 166 ierr = PetscMalloc1(nz,&vals);CHKERRQ(ierr); 167 jj = col; 168 vv = vals; 169 170 row[0] = 0; 171 for (i=0; i<m; i++) { 172 PetscInt *aj = aa->j + aa->diag[i]; 173 PetscScalar *av = aa->a + aa->diag[i]; 174 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 175 for (j=0; j<rl; j++) { 176 *jj = *aj; jj++; aj++; 177 *vv = *av; vv++; av++; 178 } 179 row[i+1] = row[i] + rl; 180 } 181 *v = vals; 182 *r = row; 183 *c = col; 184 *nnz = nz; 185 *free = PETSC_TRUE; 186 } else { 187 PetscScalar *vv; 188 PetscInt m = A->rmap->n,i; 189 190 vv = *v; 191 for (i=0; i<m; i++) { 192 PetscScalar *av = aa->a + aa->diag[i]; 193 PetscInt rl = aa->i[i+1] - aa->diag[i],j; 194 for (j=0; j<rl; j++) { 195 *vv = *av; vv++; av++; 196 } 197 } 198 *free = PETSC_TRUE; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm []) 204 { 205 int iparm_copy[IPARM_SIZE], mtype_copy, i; 206 207 mtype_copy = *mtype; 208 pardisoinit(pt, &mtype_copy, iparm_copy); 209 for(i = 0; i < IPARM_SIZE; i++){ 210 iparm[i] = iparm_copy[i]; 211 } 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMKLPardisoFactorSchur_Private" 216 static PetscErrorCode MatMKLPardisoFactorSchur_Private(Mat_MKL_PARDISO* mpardiso) 217 { 218 PetscBLASInt B_N,B_ierr; 219 PetscScalar *work,val; 220 PetscBLASInt lwork = -1; 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 if (mpardiso->schur_factored) { 225 PetscFunctionReturn(0); 226 } 227 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 228 switch (mpardiso->schur_solver_type) { 229 case 1: /* hermitian solver */ 230 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 231 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 232 ierr = PetscFPTrapPop();CHKERRQ(ierr); 233 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 234 break; 235 case 2: /* symmetric */ 236 if (!mpardiso->schur_pivots) { 237 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 238 } 239 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 240 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&val,&lwork,&B_ierr)); 241 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(val),&lwork);CHKERRQ(ierr); 242 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 243 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,work,&lwork,&B_ierr)); 244 ierr = PetscFree(work);CHKERRQ(ierr); 245 ierr = PetscFPTrapPop();CHKERRQ(ierr); 246 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 247 break; 248 default: /* general */ 249 if (!mpardiso->schur_pivots) { 250 ierr = PetscMalloc1(B_N,&mpardiso->schur_pivots);CHKERRQ(ierr); 251 } 252 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 253 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&B_ierr)); 254 ierr = PetscFPTrapPop();CHKERRQ(ierr); 255 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 256 break; 257 } 258 mpardiso->schur_factored = PETSC_TRUE; 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatMKLPardisoInvertSchur_Private" 264 static PetscErrorCode MatMKLPardisoInvertSchur_Private(Mat_MKL_PARDISO* mpardiso) 265 { 266 PetscBLASInt B_N,B_ierr; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 271 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 272 switch (mpardiso->schur_solver_type) { 273 case 1: /* hermitian solver */ 274 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 275 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,mpardiso->schur,&B_N,&B_ierr)); 276 ierr = PetscFPTrapPop();CHKERRQ(ierr); 277 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 278 break; 279 case 2: /* symmetric */ 280 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 281 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&B_ierr)); 282 ierr = PetscFPTrapPop();CHKERRQ(ierr); 283 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 284 break; 285 default: /* general */ 286 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 287 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr)); 288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 289 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 290 break; 291 } 292 mpardiso->schur_inverted = PETSC_TRUE; 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatMKLPardisoSolveSchur_Private" 298 static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat_MKL_PARDISO* mpardiso, PetscScalar *B, PetscScalar *X) 299 { 300 PetscScalar one=1.,zero=0.,*schur_rhs,*schur_sol; 301 PetscBLASInt B_N,B_Nrhs,B_ierr; 302 char type[2]; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 307 ierr = PetscBLASIntCast(mpardiso->schur_size,&B_N);CHKERRQ(ierr); 308 ierr = PetscBLASIntCast(mpardiso->nrhs,&B_Nrhs);CHKERRQ(ierr); 309 if (X == B && mpardiso->schur_inverted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address"); 310 if (X != B) { /* using LAPACK *TRS subroutines */ 311 ierr = PetscMemcpy(X,B,B_N*B_Nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 312 } 313 schur_rhs = B; 314 schur_sol = X; 315 switch (mpardiso->schur_solver_type) { 316 case 1: /* hermitian solver */ 317 if (mpardiso->schur_inverted) { /* BLAShemm should go here */ 318 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 319 } else { 320 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 321 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,schur_sol,&B_N,&B_ierr)); 322 ierr = PetscFPTrapPop();CHKERRQ(ierr); 323 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRS Lapack routine %d",(int)B_ierr); 324 } 325 break; 326 case 2: /* symmetric solver */ 327 if (mpardiso->schur_inverted) { 328 PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 329 } else { 330 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 331 PetscStackCallBLAS("LAPACKsytrs",LAPACKsytrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 332 ierr = PetscFPTrapPop();CHKERRQ(ierr); 333 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRS Lapack routine %d",(int)B_ierr); 334 } 335 break; 336 default: /* general */ 337 switch (mpardiso->iparm[12-1]) { 338 case 1: 339 sprintf(type,"C"); 340 break; 341 case 2: 342 sprintf(type,"T"); 343 break; 344 default: 345 sprintf(type,"N"); 346 break; 347 } 348 if (mpardiso->schur_inverted) { 349 PetscStackCallBLAS("BLASgemm",BLASgemm_(type,"N",&B_N,&B_Nrhs,&B_N,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N)); 350 } else { 351 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 352 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_(type,&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr)); 353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 354 if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRS Lapack routine %d",(int)B_ierr); 355 } 356 break; 357 } 358 PetscFunctionReturn(0); 359 } 360 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatFactorSetSchurIS_MKL_PARDISO" 364 PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is) 365 { 366 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 367 const PetscInt *idxs; 368 PetscInt size,i; 369 PetscMPIInt csize; 370 PetscBool sorted; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);CHKERRQ(ierr); 375 if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc\n"); 376 ierr = ISSorted(is,&sorted);CHKERRQ(ierr); 377 if (!sorted) { 378 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted\n"); 379 } 380 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 381 if (mpardiso->schur_size != size) { 382 mpardiso->schur_size = size; 383 ierr = PetscFree2(mpardiso->schur,mpardiso->schur_work);CHKERRQ(ierr); 384 ierr = PetscFree(mpardiso->schur_idxs);CHKERRQ(ierr); 385 ierr = PetscFree(mpardiso->schur_pivots);CHKERRQ(ierr); 386 ierr = PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);CHKERRQ(ierr); 387 ierr = PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);CHKERRQ(ierr); 388 ierr = PetscMalloc1(size,&mpardiso->schur_idxs);CHKERRQ(ierr); 389 } 390 ierr = PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));CHKERRQ(ierr); 391 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 392 ierr = PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 393 for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1; 394 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 395 if (size) { /* turn on Schur switch if we the set of indices is not empty */ 396 mpardiso->iparm[36-1] = 2; 397 } 398 mpardiso->schur_factored = PETSC_FALSE; 399 mpardiso->schur_inverted = PETSC_FALSE; 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatFactorCreateSchurComplement_MKL_PARDISO" 405 PetscErrorCode MatFactorCreateSchurComplement_MKL_PARDISO(Mat F,Mat* S) 406 { 407 Mat St; 408 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 409 PetscScalar *array; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 414 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 415 416 ierr = MatCreate(PetscObjectComm((PetscObject)F),&St);CHKERRQ(ierr); 417 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mpardiso->schur_size,mpardiso->schur_size);CHKERRQ(ierr); 418 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 419 ierr = MatSetUp(St);CHKERRQ(ierr); 420 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 421 ierr = PetscMemcpy(array,mpardiso->schur,mpardiso->schur_size*mpardiso->schur_size*sizeof(PetscScalar));CHKERRQ(ierr); 422 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 423 *S = St; 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatFactorGetSchurComplement_MKL_PARDISO" 429 PetscErrorCode MatFactorGetSchurComplement_MKL_PARDISO(Mat F,Mat* S) 430 { 431 Mat St; 432 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 433 PetscErrorCode ierr; 434 435 PetscFunctionBegin; 436 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 437 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 438 439 ierr = MatCreateSeqDense(PetscObjectComm((PetscObject)F),mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&St);CHKERRQ(ierr); 440 *S = St; 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatFactorInvertSchurComplement_MKL_PARDISO" 446 PetscErrorCode MatFactorInvertSchurComplement_MKL_PARDISO(Mat F) 447 { 448 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 if (!mpardiso->iparm[36-1]) { /* do nothing */ 453 PetscFunctionReturn(0); 454 } 455 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 456 ierr = MatMKLPardisoInvertSchur_Private(mpardiso);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatFactorFactorizeSchurComplement_MKL_PARDISO" 462 PetscErrorCode MatFactorFactorizeSchurComplement_MKL_PARDISO(Mat F) 463 { 464 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!mpardiso->iparm[36-1]) { /* do nothing */ 469 PetscFunctionReturn(0); 470 } 471 if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 472 ierr = MatMKLPardisoFactorSchur_Private(mpardiso);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatFactorSolveSchurComplement_MKL_PARDISO" 478 PetscErrorCode MatFactorSolveSchurComplement_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 479 { 480 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 481 PetscScalar *asol,*arhs; 482 PetscErrorCode ierr; 483 484 PetscFunctionBegin; 485 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 486 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 487 488 mpardiso->nrhs = 1; 489 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 490 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 491 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 493 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatFactorSolveSchurComplementTranspose_MKL_PARDISO" 499 PetscErrorCode MatFactorSolveSchurComplementTranspose_MKL_PARDISO(Mat F, Vec rhs, Vec sol) 500 { 501 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 502 PetscScalar *asol,*arhs; 503 PetscInt oiparm12; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 508 else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before"); 509 510 mpardiso->nrhs = 1; 511 ierr = VecGetArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 512 ierr = VecGetArray(sol,&asol);CHKERRQ(ierr); 513 oiparm12 = mpardiso->iparm[12 - 1]; 514 mpardiso->iparm[12 - 1] = 2; 515 ierr = MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);CHKERRQ(ierr); 516 mpardiso->iparm[12 - 1] = oiparm12; 517 ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);CHKERRQ(ierr); 518 ierr = VecRestoreArray(sol,&asol);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatFactorSetSchurComplementSolverType_MKL_PARDISO" 524 PetscErrorCode MatFactorSetSchurComplementSolverType_MKL_PARDISO(Mat F, PetscInt sym) 525 { 526 Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr; 527 528 PetscFunctionBegin; 529 if (mpardiso->schur_factored && sym != mpardiso->schur_solver_type) { 530 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Cannot change the Schur solver! Schur complement data has been already factored"); 531 } 532 mpardiso->schur_solver_type = sym; 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatDestroy_MKL_PARDISO" 538 PetscErrorCode MatDestroy_MKL_PARDISO(Mat A) 539 { 540 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 541 PetscErrorCode ierr; 542 543 PetscFunctionBegin; 544 if (mat_mkl_pardiso->CleanUp) { 545 mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 546 547 MKL_PARDISO (mat_mkl_pardiso->pt, 548 &mat_mkl_pardiso->maxfct, 549 &mat_mkl_pardiso->mnum, 550 &mat_mkl_pardiso->mtype, 551 &mat_mkl_pardiso->phase, 552 &mat_mkl_pardiso->n, 553 NULL, 554 NULL, 555 NULL, 556 NULL, 557 &mat_mkl_pardiso->nrhs, 558 mat_mkl_pardiso->iparm, 559 &mat_mkl_pardiso->msglvl, 560 NULL, 561 NULL, 562 &mat_mkl_pardiso->err); 563 } 564 ierr = PetscFree(mat_mkl_pardiso->perm);CHKERRQ(ierr); 565 ierr = PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 566 ierr = PetscFree(mat_mkl_pardiso->schur_idxs);CHKERRQ(ierr); 567 ierr = PetscFree(mat_mkl_pardiso->schur_pivots);CHKERRQ(ierr); 568 if (mat_mkl_pardiso->freeaij) { 569 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 570 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 571 } 572 if (mat_mkl_pardiso->Destroy) { 573 ierr = (mat_mkl_pardiso->Destroy)(A);CHKERRQ(ierr); 574 } 575 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 576 577 /* clear composed functions */ 578 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSchurComplement_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorInvertSchurComplement_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorFactorizeSchurComplement_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplement_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplementTranspose_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurComplementSolverType_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatMKLPardisoScatterSchur_Private" 593 static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce) 594 { 595 PetscFunctionBegin; 596 if (reduce) { /* data given for the whole matrix */ 597 PetscInt i,m=0,p=0; 598 for (i=0;i<mpardiso->nrhs;i++) { 599 PetscInt j; 600 for (j=0;j<mpardiso->schur_size;j++) { 601 schur[p+j] = whole[m+mpardiso->schur_idxs[j]]; 602 } 603 m += mpardiso->n; 604 p += mpardiso->schur_size; 605 } 606 } else { /* from Schur to whole */ 607 PetscInt i,m=0,p=0; 608 for (i=0;i<mpardiso->nrhs;i++) { 609 PetscInt j; 610 for (j=0;j<mpardiso->schur_size;j++) { 611 whole[m+mpardiso->schur_idxs[j]] = schur[p+j]; 612 } 613 m += mpardiso->n; 614 p += mpardiso->schur_size; 615 } 616 } 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatSolve_MKL_PARDISO" 622 PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x) 623 { 624 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr; 625 PetscErrorCode ierr; 626 PetscScalar *xarray; 627 const PetscScalar *barray; 628 629 PetscFunctionBegin; 630 mat_mkl_pardiso->nrhs = 1; 631 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 632 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 633 634 if (!mat_mkl_pardiso->schur) { 635 mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 636 } else { 637 mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 638 } 639 640 if (barray == xarray) { /* if the two vectors share the same memory */ 641 PetscScalar *work; 642 if (!mat_mkl_pardiso->schur_work) { 643 ierr = PetscMalloc1(mat_mkl_pardiso->n,&work);CHKERRQ(ierr); 644 } else { 645 work = mat_mkl_pardiso->schur_work; 646 } 647 mat_mkl_pardiso->iparm[6-1] = 1; 648 MKL_PARDISO (mat_mkl_pardiso->pt, 649 &mat_mkl_pardiso->maxfct, 650 &mat_mkl_pardiso->mnum, 651 &mat_mkl_pardiso->mtype, 652 &mat_mkl_pardiso->phase, 653 &mat_mkl_pardiso->n, 654 mat_mkl_pardiso->a, 655 mat_mkl_pardiso->ia, 656 mat_mkl_pardiso->ja, 657 NULL, 658 &mat_mkl_pardiso->nrhs, 659 mat_mkl_pardiso->iparm, 660 &mat_mkl_pardiso->msglvl, 661 (void*)xarray, 662 (void*)work, 663 &mat_mkl_pardiso->err); 664 if (!mat_mkl_pardiso->schur_work) { 665 ierr = PetscFree(work);CHKERRQ(ierr); 666 } 667 } else { 668 mat_mkl_pardiso->iparm[6-1] = 0; 669 MKL_PARDISO (mat_mkl_pardiso->pt, 670 &mat_mkl_pardiso->maxfct, 671 &mat_mkl_pardiso->mnum, 672 &mat_mkl_pardiso->mtype, 673 &mat_mkl_pardiso->phase, 674 &mat_mkl_pardiso->n, 675 mat_mkl_pardiso->a, 676 mat_mkl_pardiso->ia, 677 mat_mkl_pardiso->ja, 678 mat_mkl_pardiso->perm, 679 &mat_mkl_pardiso->nrhs, 680 mat_mkl_pardiso->iparm, 681 &mat_mkl_pardiso->msglvl, 682 (void*)barray, 683 (void*)xarray, 684 &mat_mkl_pardiso->err); 685 } 686 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 687 688 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 689 690 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 691 PetscInt shift = mat_mkl_pardiso->schur_size; 692 693 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 694 if (!mat_mkl_pardiso->schur_inverted) { 695 shift = 0; 696 } 697 698 if (!mat_mkl_pardiso->solve_interior) { 699 /* solve Schur complement */ 700 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 701 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 702 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 703 } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substitued to xarray[schur] will be neglected */ 704 PetscInt i; 705 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 706 xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.; 707 } 708 } 709 710 /* expansion phase */ 711 mat_mkl_pardiso->iparm[6-1] = 1; 712 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 713 MKL_PARDISO (mat_mkl_pardiso->pt, 714 &mat_mkl_pardiso->maxfct, 715 &mat_mkl_pardiso->mnum, 716 &mat_mkl_pardiso->mtype, 717 &mat_mkl_pardiso->phase, 718 &mat_mkl_pardiso->n, 719 mat_mkl_pardiso->a, 720 mat_mkl_pardiso->ia, 721 mat_mkl_pardiso->ja, 722 mat_mkl_pardiso->perm, 723 &mat_mkl_pardiso->nrhs, 724 mat_mkl_pardiso->iparm, 725 &mat_mkl_pardiso->msglvl, 726 (void*)xarray, 727 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 728 &mat_mkl_pardiso->err); 729 730 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 731 mat_mkl_pardiso->iparm[6-1] = 0; 732 } 733 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 734 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatSolveTranspose_MKL_PARDISO" 740 PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x) 741 { 742 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 743 PetscInt oiparm12; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 oiparm12 = mat_mkl_pardiso->iparm[12 - 1]; 748 mat_mkl_pardiso->iparm[12 - 1] = 2; 749 ierr = MatSolve_MKL_PARDISO(A,b,x);CHKERRQ(ierr); 750 mat_mkl_pardiso->iparm[12 - 1] = oiparm12; 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatMatSolve_MKL_PARDISO" 756 PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X) 757 { 758 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr; 759 PetscErrorCode ierr; 760 PetscScalar *barray, *xarray; 761 PetscBool flg; 762 763 PetscFunctionBegin; 764 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 765 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 766 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 767 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 768 769 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 770 771 if(mat_mkl_pardiso->nrhs > 0){ 772 ierr = MatDenseGetArray(B,&barray); 773 ierr = MatDenseGetArray(X,&xarray); 774 775 if (barray == xarray) { 776 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location"); 777 } 778 779 if (!mat_mkl_pardiso->schur) { 780 mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 781 } else { 782 mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 783 } 784 mat_mkl_pardiso->iparm[6-1] = 0; 785 786 MKL_PARDISO (mat_mkl_pardiso->pt, 787 &mat_mkl_pardiso->maxfct, 788 &mat_mkl_pardiso->mnum, 789 &mat_mkl_pardiso->mtype, 790 &mat_mkl_pardiso->phase, 791 &mat_mkl_pardiso->n, 792 mat_mkl_pardiso->a, 793 mat_mkl_pardiso->ia, 794 mat_mkl_pardiso->ja, 795 mat_mkl_pardiso->perm, 796 &mat_mkl_pardiso->nrhs, 797 mat_mkl_pardiso->iparm, 798 &mat_mkl_pardiso->msglvl, 799 (void*)barray, 800 (void*)xarray, 801 &mat_mkl_pardiso->err); 802 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 803 804 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 805 PetscScalar *o_schur_work = NULL; 806 PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale; 807 PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs; 808 809 /* allocate extra memory if it is needed */ 810 scale = 1; 811 if (mat_mkl_pardiso->schur_inverted) { 812 scale = 2; 813 } 814 mem *= scale; 815 if (mem > mat_mkl_pardiso->schur_work_size) { 816 o_schur_work = mat_mkl_pardiso->schur_work; 817 ierr = PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 818 } 819 820 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 821 if (!mat_mkl_pardiso->schur_inverted) { 822 shift = 0; 823 } 824 825 /* solve Schur complement */ 826 if (!mat_mkl_pardiso->solve_interior) { 827 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);CHKERRQ(ierr); 828 ierr = MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);CHKERRQ(ierr); 829 ierr = MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);CHKERRQ(ierr); 830 } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substitued to xarray[schur,n] will be neglected */ 831 PetscInt i,n,m=0; 832 for (n=0;n<mat_mkl_pardiso->nrhs;n++) { 833 for (i=0;i<mat_mkl_pardiso->schur_size;i++) { 834 xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.; 835 } 836 m += mat_mkl_pardiso->n; 837 } 838 } 839 840 /* expansion phase */ 841 mat_mkl_pardiso->iparm[6-1] = 1; 842 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 843 MKL_PARDISO (mat_mkl_pardiso->pt, 844 &mat_mkl_pardiso->maxfct, 845 &mat_mkl_pardiso->mnum, 846 &mat_mkl_pardiso->mtype, 847 &mat_mkl_pardiso->phase, 848 &mat_mkl_pardiso->n, 849 mat_mkl_pardiso->a, 850 mat_mkl_pardiso->ia, 851 mat_mkl_pardiso->ja, 852 mat_mkl_pardiso->perm, 853 &mat_mkl_pardiso->nrhs, 854 mat_mkl_pardiso->iparm, 855 &mat_mkl_pardiso->msglvl, 856 (void*)xarray, 857 (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 858 &mat_mkl_pardiso->err); 859 if (o_schur_work) { /* restore original schur_work (minimal size) */ 860 ierr = PetscFree(mat_mkl_pardiso->schur_work);CHKERRQ(ierr); 861 mat_mkl_pardiso->schur_work = o_schur_work; 862 } 863 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 864 mat_mkl_pardiso->iparm[6-1] = 0; 865 } 866 } 867 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatFactorNumeric_MKL_PARDISO" 873 PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info) 874 { 875 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->spptr; 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 880 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 881 882 mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION; 883 MKL_PARDISO (mat_mkl_pardiso->pt, 884 &mat_mkl_pardiso->maxfct, 885 &mat_mkl_pardiso->mnum, 886 &mat_mkl_pardiso->mtype, 887 &mat_mkl_pardiso->phase, 888 &mat_mkl_pardiso->n, 889 mat_mkl_pardiso->a, 890 mat_mkl_pardiso->ia, 891 mat_mkl_pardiso->ja, 892 mat_mkl_pardiso->perm, 893 &mat_mkl_pardiso->nrhs, 894 mat_mkl_pardiso->iparm, 895 &mat_mkl_pardiso->msglvl, 896 NULL, 897 (void*)mat_mkl_pardiso->schur, 898 &mat_mkl_pardiso->err); 899 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err); 900 901 if (mat_mkl_pardiso->schur) { /* schur output from pardiso is in row major format */ 902 PetscInt j,k,n=mat_mkl_pardiso->schur_size; 903 if (!mat_mkl_pardiso->schur_solver_type) { 904 for (j=0; j<n; j++) { 905 for (k=0; k<j; k++) { 906 PetscScalar tmp = mat_mkl_pardiso->schur[j + k*n]; 907 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 908 mat_mkl_pardiso->schur[k + j*n] = tmp; 909 } 910 } 911 } else { /* we could use row-major in LAPACK routines (e.g. use 'U' instead of 'L'; instead, I prefer consistency between data structures and swap to column major */ 912 for (j=0; j<n; j++) { 913 for (k=0; k<j; k++) { 914 mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n]; 915 } 916 } 917 } 918 } 919 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 920 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 921 mat_mkl_pardiso->schur_factored = PETSC_FALSE; 922 mat_mkl_pardiso->schur_inverted = PETSC_FALSE; 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "PetscSetMKL_PARDISOFromOptions" 928 PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A) 929 { 930 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr; 931 PetscErrorCode ierr; 932 PetscInt icntl,threads=1; 933 PetscBool flg; 934 935 PetscFunctionBegin; 936 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");CHKERRQ(ierr); 937 938 ierr = PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);CHKERRQ(ierr); 939 if (flg) PetscSetMKL_PARDISOThreads((int)threads); 940 941 ierr = PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 942 if (flg) mat_mkl_pardiso->maxfct = icntl; 943 944 ierr = PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 945 if (flg) mat_mkl_pardiso->mnum = icntl; 946 947 ierr = PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 948 if (flg) mat_mkl_pardiso->msglvl = icntl; 949 950 ierr = PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 951 if(flg){ 952 void *pt[IPARM_SIZE]; 953 mat_mkl_pardiso->mtype = icntl; 954 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 955 #if defined(PETSC_USE_REAL_SINGLE) 956 mat_mkl_pardiso->iparm[27] = 1; 957 #else 958 mat_mkl_pardiso->iparm[27] = 0; 959 #endif 960 mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */ 961 } 962 ierr = PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 963 964 if (flg && icntl != 0) { 965 ierr = PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 966 if (flg) mat_mkl_pardiso->iparm[1] = icntl; 967 968 ierr = PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 969 if (flg) mat_mkl_pardiso->iparm[3] = icntl; 970 971 ierr = PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 972 if (flg) mat_mkl_pardiso->iparm[4] = icntl; 973 974 ierr = PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 975 if (flg) mat_mkl_pardiso->iparm[5] = icntl; 976 977 ierr = PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 978 if (flg) mat_mkl_pardiso->iparm[7] = icntl; 979 980 ierr = PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 981 if (flg) mat_mkl_pardiso->iparm[9] = icntl; 982 983 ierr = PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 984 if (flg) mat_mkl_pardiso->iparm[10] = icntl; 985 986 ierr = PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 987 if (flg) mat_mkl_pardiso->iparm[11] = icntl; 988 989 ierr = PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);CHKERRQ(ierr); 990 if (flg) mat_mkl_pardiso->iparm[12] = icntl; 991 992 ierr = PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);CHKERRQ(ierr); 993 if (flg) mat_mkl_pardiso->iparm[17] = icntl; 994 995 ierr = PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 996 if (flg) mat_mkl_pardiso->iparm[18] = icntl; 997 998 ierr = PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 999 if (flg) mat_mkl_pardiso->iparm[20] = icntl; 1000 1001 ierr = PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 1002 if (flg) mat_mkl_pardiso->iparm[23] = icntl; 1003 1004 ierr = PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 1005 if (flg) mat_mkl_pardiso->iparm[24] = icntl; 1006 1007 ierr = PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 1008 if (flg) mat_mkl_pardiso->iparm[26] = icntl; 1009 1010 ierr = PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 1011 if (flg) mat_mkl_pardiso->iparm[30] = icntl; 1012 1013 ierr = PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 1014 if (flg) mat_mkl_pardiso->iparm[33] = icntl; 1015 1016 ierr = PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 1017 if (flg) mat_mkl_pardiso->iparm[59] = icntl; 1018 } 1019 PetscOptionsEnd(); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatFactorMKL_PARDISOInitialize_Private" 1025 PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso) 1026 { 1027 PetscErrorCode ierr; 1028 PetscInt i; 1029 1030 PetscFunctionBegin; 1031 for ( i = 0; i < IPARM_SIZE; i++ ){ 1032 mat_mkl_pardiso->iparm[i] = 0; 1033 } 1034 for ( i = 0; i < IPARM_SIZE; i++ ){ 1035 mat_mkl_pardiso->pt[i] = 0; 1036 } 1037 /* Default options for both sym and unsym */ 1038 mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 1039 mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */ 1040 mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */ 1041 mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */ 1042 mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 1043 mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 1044 #if 0 1045 mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/ 1046 #endif 1047 mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ 1048 mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */ 1049 1050 mat_mkl_pardiso->CleanUp = PETSC_FALSE; 1051 mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */ 1052 mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */ 1053 mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */ 1054 mat_mkl_pardiso->phase = -1; 1055 mat_mkl_pardiso->err = 0; 1056 1057 mat_mkl_pardiso->n = A->rmap->N; 1058 mat_mkl_pardiso->nrhs = 1; 1059 mat_mkl_pardiso->err = 0; 1060 mat_mkl_pardiso->phase = -1; 1061 1062 if(ftype == MAT_FACTOR_LU){ 1063 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 1064 mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 1065 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 1066 1067 } else { 1068 mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 1069 mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */ 1070 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 1071 /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */ 1072 #if defined(PETSC_USE_DEBUG) 1073 mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */ 1074 #endif 1075 } 1076 ierr = PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);CHKERRQ(ierr); 1077 for(i = 0; i < A->rmap->N; i++){ 1078 mat_mkl_pardiso->perm[i] = 0; 1079 } 1080 mat_mkl_pardiso->schur_size = 0; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatFactorSymbolic_AIJMKL_PARDISO_Private" 1086 PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info) 1087 { 1088 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 1093 ierr = PetscSetMKL_PARDISOFromOptions(F,A);CHKERRQ(ierr); 1094 1095 /* throw away any previously computed structure */ 1096 if (mat_mkl_pardiso->freeaij) { 1097 ierr = PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);CHKERRQ(ierr); 1098 ierr = PetscFree(mat_mkl_pardiso->a);CHKERRQ(ierr); 1099 } 1100 ierr = (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);CHKERRQ(ierr); 1101 mat_mkl_pardiso->n = A->rmap->N; 1102 1103 mat_mkl_pardiso->phase = JOB_ANALYSIS; 1104 1105 MKL_PARDISO (mat_mkl_pardiso->pt, 1106 &mat_mkl_pardiso->maxfct, 1107 &mat_mkl_pardiso->mnum, 1108 &mat_mkl_pardiso->mtype, 1109 &mat_mkl_pardiso->phase, 1110 &mat_mkl_pardiso->n, 1111 mat_mkl_pardiso->a, 1112 mat_mkl_pardiso->ia, 1113 mat_mkl_pardiso->ja, 1114 mat_mkl_pardiso->perm, 1115 &mat_mkl_pardiso->nrhs, 1116 mat_mkl_pardiso->iparm, 1117 &mat_mkl_pardiso->msglvl, 1118 NULL, 1119 NULL, 1120 &mat_mkl_pardiso->err); 1121 if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d\n. Please check manual",mat_mkl_pardiso->err); 1122 1123 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 1124 1125 if (F->factortype == MAT_FACTOR_LU) { 1126 F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO; 1127 } else { 1128 F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO; 1129 } 1130 F->ops->solve = MatSolve_MKL_PARDISO; 1131 F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO; 1132 F->ops->matsolve = MatMatSolve_MKL_PARDISO; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_PARDISO" 1138 PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #if !defined(PETSC_USE_COMPLEX) 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInertia_MKL_PARDISO" 1150 PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F,int *nneg,int *nzero,int *npos) 1151 { 1152 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)F->spptr; 1153 1154 PetscFunctionBegin; 1155 if (nneg) *nneg = mat_mkl_pardiso->iparm[22]; 1156 if (npos) *npos = mat_mkl_pardiso->iparm[21]; 1157 if (nzero) *nzero = F->rmap->N -(mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]); 1158 PetscFunctionReturn(0); 1159 } 1160 #endif 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatCholeskyFactorSymbolic_AIJMKL_PARDISO" 1164 PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info) 1165 { 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);CHKERRQ(ierr); 1170 #if defined(PETSC_USE_COMPLEX) 1171 F->ops->getinertia = NULL; 1172 #else 1173 F->ops->getinertia = MatGetInertia_MKL_PARDISO; 1174 #endif 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatView_MKL_PARDISO" 1180 PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer) 1181 { 1182 PetscErrorCode ierr; 1183 PetscBool iascii; 1184 PetscViewerFormat format; 1185 Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr; 1186 PetscInt i; 1187 1188 PetscFunctionBegin; 1189 if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(0); 1190 1191 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1192 if (iascii) { 1193 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1194 if (format == PETSC_VIEWER_ASCII_INFO) { 1195 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);CHKERRQ(ierr); 1197 for(i = 1; i <= 64; i++){ 1198 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);CHKERRQ(ierr); 1199 } 1200 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);CHKERRQ(ierr); 1205 ierr = PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);CHKERRQ(ierr); 1206 } 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatGetInfo_MKL_PARDISO" 1214 PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info) 1215 { 1216 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->spptr; 1217 1218 PetscFunctionBegin; 1219 info->block_size = 1.0; 1220 info->nz_used = mat_mkl_pardiso->nz; 1221 info->nz_allocated = mat_mkl_pardiso->nz; 1222 info->nz_unneeded = 0.0; 1223 info->assemblies = 0.0; 1224 info->mallocs = 0.0; 1225 info->memory = 0.0; 1226 info->fill_ratio_given = 0; 1227 info->fill_ratio_needed = 0; 1228 info->factor_mallocs = 0; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatMkl_PardisoSetCntl_MKL_PARDISO" 1234 PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival) 1235 { 1236 Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->spptr; 1237 1238 PetscFunctionBegin; 1239 if(icntl <= 64){ 1240 mat_mkl_pardiso->iparm[icntl - 1] = ival; 1241 } else { 1242 if(icntl == 65) 1243 PetscSetMKL_PARDISOThreads(ival); 1244 else if(icntl == 66) 1245 mat_mkl_pardiso->maxfct = ival; 1246 else if(icntl == 67) 1247 mat_mkl_pardiso->mnum = ival; 1248 else if(icntl == 68) 1249 mat_mkl_pardiso->msglvl = ival; 1250 else if(icntl == 69){ 1251 void *pt[IPARM_SIZE]; 1252 mat_mkl_pardiso->mtype = ival; 1253 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 1254 #if defined(PETSC_USE_REAL_SINGLE) 1255 mat_mkl_pardiso->iparm[27] = 1; 1256 #else 1257 mat_mkl_pardiso->iparm[27] = 0; 1258 #endif 1259 mat_mkl_pardiso->iparm[34] = 1; 1260 } else if(icntl==70) { 1261 mat_mkl_pardiso->solve_interior = (PetscBool)!!ival; 1262 } 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatMkl_PardisoSetCntl" 1269 /*@ 1270 MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters 1271 1272 Logically Collective on Mat 1273 1274 Input Parameters: 1275 + F - the factored matrix obtained by calling MatGetFactor() 1276 . icntl - index of Mkl_Pardiso parameter 1277 - ival - value of Mkl_Pardiso parameter 1278 1279 Options Database: 1280 . -mat_mkl_pardiso_<icntl> <ival> 1281 1282 Level: beginner 1283 1284 References: 1285 . Mkl_Pardiso Users' Guide 1286 1287 .seealso: MatGetFactor() 1288 @*/ 1289 PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 1290 { 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /*MC 1299 MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for 1300 sequential matrices via the external package MKL_PARDISO. 1301 1302 Works with MATSEQAIJ matrices 1303 1304 Use -pc_type lu -pc_factor_mat_solver_package mkl_pardiso to us this direct solver 1305 1306 Options Database Keys: 1307 + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO 1308 . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 1309 . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase 1310 . -mat_mkl_pardiso_68 - Message level information 1311 . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 1312 . -mat_mkl_pardiso_1 - Use default values 1313 . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix 1314 . -mat_mkl_pardiso_4 - Preconditioned CGS/CG 1315 . -mat_mkl_pardiso_5 - User permutation 1316 . -mat_mkl_pardiso_6 - Write solution on x 1317 . -mat_mkl_pardiso_8 - Iterative refinement step 1318 . -mat_mkl_pardiso_10 - Pivoting perturbation 1319 . -mat_mkl_pardiso_11 - Scaling vectors 1320 . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A 1321 . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching 1322 . -mat_mkl_pardiso_18 - Numbers of non-zero elements 1323 . -mat_mkl_pardiso_19 - Report number of floating point operations 1324 . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices 1325 . -mat_mkl_pardiso_24 - Parallel factorization control 1326 . -mat_mkl_pardiso_25 - Parallel forward/backward solve control 1327 . -mat_mkl_pardiso_27 - Matrix checker 1328 . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors 1329 . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 1330 - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode 1331 1332 Level: beginner 1333 1334 For more information please check mkl_pardiso manual 1335 1336 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage 1337 1338 M*/ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_pardiso" 1341 static PetscErrorCode MatFactorGetSolverPackage_mkl_pardiso(Mat A, const MatSolverPackage *type) 1342 { 1343 PetscFunctionBegin; 1344 *type = MATSOLVERMKL_PARDISO; 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetFactor_aij_mkl_pardiso" 1350 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F) 1351 { 1352 Mat B; 1353 PetscErrorCode ierr; 1354 Mat_MKL_PARDISO *mat_mkl_pardiso; 1355 PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ; 1356 1357 PetscFunctionBegin; 1358 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 1359 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 1360 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 1361 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1362 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1363 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1364 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 1365 ierr = MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,NULL);CHKERRQ(ierr); 1366 ierr = MatSeqSBAIJSetPreallocation(B,A->rmap->bs,0,NULL);CHKERRQ(ierr); 1367 1368 ierr = PetscNewLog(B,&mat_mkl_pardiso);CHKERRQ(ierr); 1369 B->spptr = mat_mkl_pardiso; 1370 1371 ierr = MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);CHKERRQ(ierr); 1372 if (ftype == MAT_FACTOR_LU) { 1373 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO; 1374 B->factortype = MAT_FACTOR_LU; 1375 mat_mkl_pardiso->needsym = PETSC_FALSE; 1376 if (isSeqAIJ) { 1377 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1378 } else if (isSeqBAIJ) { 1379 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1380 } else if (isSeqSBAIJ) { 1381 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead"); 1382 } else { 1383 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name); 1384 } 1385 mat_mkl_pardiso->schur_solver_type = 0; 1386 #if defined(PETSC_USE_COMPLEX) 1387 mat_mkl_pardiso->mtype = 13; 1388 #else 1389 if (A->structurally_symmetric) { 1390 mat_mkl_pardiso->mtype = 1; 1391 } else { 1392 mat_mkl_pardiso->mtype = 11; 1393 } 1394 #endif 1395 } else { 1396 #if defined(PETSC_USE_COMPLEX) 1397 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name); 1398 #endif 1399 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO; 1400 B->factortype = MAT_FACTOR_CHOLESKY; 1401 if (isSeqAIJ) { 1402 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1403 } else if (isSeqBAIJ) { 1404 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1405 } else if (isSeqSBAIJ) { 1406 mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij; 1407 } else { 1408 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name); 1409 } 1410 mat_mkl_pardiso->needsym = PETSC_TRUE; 1411 if (A->spd_set && A->spd) { 1412 mat_mkl_pardiso->schur_solver_type = 1; 1413 mat_mkl_pardiso->mtype = 2; 1414 } else { 1415 mat_mkl_pardiso->schur_solver_type = 2; 1416 mat_mkl_pardiso->mtype = -2; 1417 } 1418 } 1419 mat_mkl_pardiso->Destroy = B->ops->destroy; 1420 B->ops->destroy = MatDestroy_MKL_PARDISO; 1421 B->ops->view = MatView_MKL_PARDISO; 1422 B->factortype = ftype; 1423 B->ops->getinfo = MatGetInfo_MKL_PARDISO; 1424 B->assembled = PETSC_TRUE; 1425 1426 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 1427 ierr = PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);CHKERRQ(ierr); 1428 1429 1430 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_pardiso);CHKERRQ(ierr); 1431 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);CHKERRQ(ierr); 1432 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSchurComplement_C",MatFactorGetSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorInvertSchurComplement_C",MatFactorInvertSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorFactorizeSchurComplement_C",MatFactorFactorizeSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplement_C",MatFactorSolveSchurComplement_MKL_PARDISO);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplementTranspose_C",MatFactorSolveSchurComplementTranspose_MKL_PARDISO);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurComplementSolverType_C",MatFactorSetSchurComplementSolverType_MKL_PARDISO);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);CHKERRQ(ierr); 1440 1441 *F = B; 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSolverPackageRegister_MKL_Pardiso" 1447 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_Pardiso(void) 1448 { 1449 PetscErrorCode ierr; 1450 1451 PetscFunctionBegin; 1452 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1453 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1454 ierr = MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458